draft-ietf-avt-vorbis-rtp-00.xml 49 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217
  1. <?xml version='1.0'?>
  2. <!DOCTYPE rfc SYSTEM 'rfc2629.dtd'>
  3. <?rfc toc="yes" ?>
  4. <?rfc compact='yes'?>
  5. <rfc ipr="full3667" docName="RTP Payload Format for Vorbis Encoded Audio">
  6. <front>
  7. <title>draft-ietf-avt-vorbis-rtp-00</title>
  8. <author initials="P" surname="Kerr" fullname="Phil Kerr">
  9. <organization>Xiph.Org</organization>
  10. <address>
  11. <email>phil@plus24.com</email>
  12. <uri>http://www.xiph.org/</uri>
  13. </address>
  14. </author>
  15. <date day="31" month="January" year="2005" />
  16. <area>General</area>
  17. <workgroup>AVT Working Group</workgroup>
  18. <keyword>I-D</keyword>
  19. <keyword>Internet-Draft</keyword>
  20. <keyword>Vorbis</keyword>
  21. <keyword>RTP</keyword>
  22. <abstract>
  23. <t>This document describes an RTP payload format for transporting Vorbis encoded audio. It details the RTP encapsulation
  24. mechanism for raw Vorbis data and details the delivery mechanisms for the decoder probability model, referred to as a
  25. codebook, metadata and other setup information.</t>
  26. <t>
  27. Also included within the document are the necessary details for the use of Vorbis with MIME and Session Description Protocol
  28. (SDP).
  29. </t>
  30. </abstract>
  31. <note title="Editors Note">
  32. <t>
  33. All references to RFC XXXX are to be replaced by references to the RFC number of this memo, when published.
  34. </t>
  35. </note>
  36. </front>
  37. <middle>
  38. <section anchor="Introduction" title="Introduction">
  39. <t>
  40. Vorbis is a general purpose perceptual audio codec intended to allow maximum encoder flexibility, thus allowing it to scale
  41. competitively over an exceptionally wide range of bitrates. At the high quality/bitrate end of the scale (CD or DAT rate
  42. stereo, 16/24 bits), it is in the same league as MPEG-2 and MPC. Similarly, the 1.0 encoder can encode high-quality CD and
  43. DAT rate stereo at below 48k bits/sec without resampling to a lower rate. Vorbis is also intended for lower and higher sample
  44. rates (from 8kHz telephony to 192kHz digital masters) and a range of channel representations (monaural, polyphonic, stereo,
  45. quadraphonic, 5.1, ambisonic, or up to 255 discrete channels).
  46. </t>
  47. <t>
  48. Vorbis encoded audio is generally encapsulated within an Ogg format bitstream <xref target="rfc3533"></xref>, which provides
  49. framing and synchronization. For the purposes of RTP transport, this layer is unnecessary, and so raw Vorbis packets are used
  50. in the payload.
  51. </t>
  52. <section anchor="Terminology" title="Terminology">
  53. <t>
  54. The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL"
  55. in this document are to be interpreted as described in RFC 2119 <xref target="rfc2119"></xref>.
  56. </t>
  57. </section>
  58. </section>
  59. <section anchor="Payload Format" title="Payload Format">
  60. <t>
  61. For RTP based transportation of Vorbis encoded audio the standard RTP header is followed by a 5 octet payload header, then the
  62. payload data. The payload headers are used to associate the Vorbis data with its associated decoding codebooks as well as
  63. indicating if the following packet contains fragmented Vorbis data and/or the the number of whole Vorbis data frames. The
  64. payload data contains the raw Vorbis bitstream information.
  65. </t>
  66. <section anchor="RTP Header" title="RTP Header">
  67. <t>
  68. The format of the RTP header is specified in <xref target="rfc3550"></xref> and shown in Figure 1. This payload format uses the fields of the header in a manner consistent with that specification.
  69. </t>
  70. <t>
  71. <figure anchor="RTP Header Figure" title="RTP Header">
  72. <artwork><![CDATA[
  73. 0 1 2 3
  74. 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
  75. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  76. |V=2|P|X| CC |M| PT | sequence number |
  77. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  78. | timestamp |
  79. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  80. | synchronization source (SSRC) identifier |
  81. +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
  82. | contributing source (CSRC) identifiers |
  83. | ... |
  84. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  85. ]]></artwork>
  86. </figure>
  87. </t>
  88. <t>
  89. The RTP header begins with an octet of fields (V, P, X, and CC) to support specialized RTP uses (see <xref target="rfc3550">
  90. </xref> and <xref target="rfc3551"></xref> for details). For Vorbis RTP, the following values are used.
  91. </t>
  92. <t>
  93. Version (V): 2 bits</t>
  94. <t>
  95. This field identifies the version of RTP. The version used by this specification is two (2).
  96. </t>
  97. <t>
  98. Padding (P): 1 bit</t>
  99. <t>
  100. Padding MAY be used with this payload format according to section 5.1 of <xref target="rfc3550"></xref>.
  101. </t>
  102. <t>
  103. Extension (X): 1 bit</t>
  104. <t>
  105. The Extension bit is used in accordance with <xref target="rfc3550"></xref>.
  106. </t>
  107. <t>
  108. CSRC count (CC): 4 bits</t>
  109. <t>
  110. The CSRC count is used in accordance with <xref target="rfc3550"></xref>.
  111. </t>
  112. <t>
  113. Marker (M): 1 bit</t>
  114. <t>
  115. Set to zero. Audio silence suppression not used. This conforms to section 4.1 of <xref target="vorbis-spec-ref"></xref>.
  116. </t>
  117. <t>
  118. Payload Type (PT): 7 bits</t>
  119. <t>
  120. An RTP profile for a class of applications is expected to assign a payload type for this format, or a dynamically allocated
  121. payload type SHOULD be chosen which designates the payload as Vorbis.
  122. </t>
  123. <t>
  124. Sequence number: 16 bits</t>
  125. <t>
  126. The sequence number increments by one for each RTP data packet sent, and may be used by the receiver to detect packet loss and
  127. to restore packet sequence. This field is detailed further in <xref target="rfc3550"></xref>.
  128. </t>
  129. <t>
  130. Timestamp: 32 bits</t>
  131. <t>
  132. A timestamp representing the sampling time of the first sample of the first Vorbis packet in the RTP packet. The clock frequency
  133. MUST be set to the sample rate of the encoded audio data and is conveyed out-of-band as a SDP attribute.
  134. </t>
  135. <t>
  136. SSRC/CSRC identifiers: </t>
  137. <t>
  138. These two fields, 32 bits each with one SSRC field and a maximum of 16 CSRC fields, are as defined in <xref target="rfc3550">
  139. </xref>.
  140. </t>
  141. </section>
  142. <section anchor="Payload Header" title="Payload Header">
  143. <t>
  144. After the RTP Header section the following five octets are the Payload Header. This header is split into a number of bitfields
  145. detailing the format of the following payload data packets.
  146. </t>
  147. <figure anchor="Payload Header Figure" title="Payload Header">
  148. <artwork><![CDATA[
  149. 0 1 2 3
  150. 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
  151. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  152. | Codebook Ident |
  153. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  154. |C|F|VDT|# pkts.|
  155. +-+-+-+-+-+-+-+-+
  156. ]]></artwork>
  157. </figure>
  158. <t>
  159. Codebook Ident: 32 bits</t>
  160. <t>
  161. This 32 bit field is used to associate the Vorbis data to a decoding Codebook. It is created by making a CRC32 checksum
  162. of the codebook required to decode the particular Vorbis audio stream.
  163. </t>
  164. <t>
  165. Continuation (C): 1 bit</t>
  166. <t>
  167. Set to one if this is a continuation of a fragmented packet.
  168. </t>
  169. <t>
  170. Fragmented (F): 1 bit</t>
  171. <t>
  172. Set to one if the payload contains complete packets or if it contains the last fragment of a fragmented packet.
  173. </t>
  174. <t>
  175. Vorbis Data Type (VDT): 2 bits</t>
  176. <t>
  177. This field sets the packet payload type for the Vorbis data. There are currently four type of Vorbis payloads.
  178. </t>
  179. <vspace blankLines="1" />
  180. <list style="empty">
  181. <t> 0 = Raw Vorbis payload</t>
  182. <t> 1 = Vorbis Setup payload</t>
  183. <t> 2 = Vorbis Codebook payload</t>
  184. <t> 3 = Vorbis Metadata payload</t>
  185. </list>
  186. <t>
  187. The last 4 bits are the number of complete packets in this payload. This provides for a maximum number of 15 Vorbis
  188. packets in the payload. If the packet contains fragmented data the number of packets MUST be set to 0.
  189. </t>
  190. </section>
  191. <section anchor="Payload Data" title="Payload Data">
  192. <t>
  193. Raw Vorbis packets are unbounded in length currently, although at some future point there will likely be a practical
  194. limit placed on them. Typical Vorbis packet sizes are from very small (2-3 bytes) to quite large (8-12 kilobytes).
  195. The reference implementation <xref target="libvorbis"></xref> typically produces packets less than ~800 bytes, except for the
  196. codebook header packets which are ~4-12 kilobytes. Within an RTP context the maximum Vorbis packet size, including the
  197. RTP and payload headers, SHOULD be kept below the path MTU to avoid packet fragmentation.
  198. </t>
  199. <figure anchor="Payload Data Figure" title="Payload Data Header">
  200. <artwork><![CDATA[
  201. 0 1 2 3
  202. 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
  203. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  204. | length | vorbis packet data ..
  205. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  206. ]]></artwork>
  207. </figure>
  208. <t>
  209. Each Vorbis payload packet starts with a two octet length header, which is used to represent the size of the following
  210. data payload, followed by the raw Vorbis data.
  211. </t>
  212. <t>
  213. For payloads which consist of multiple Vorbis packets the payload data consists of the packet length followed by the
  214. packet data for each of the Vorbis packets in the payload.
  215. </t>
  216. <t>
  217. The Vorbis packet length header is the length of the Vorbis data block only and does not count the length field.
  218. </t>
  219. <t>
  220. The payload packing of the Vorbis data packets SHOULD follow the guidelines set-out in <xref target="rfc3551"></xref>
  221. where the oldest packet occurs immediately after the RTP packet header.
  222. </t>
  223. <t>
  224. Channel mapping of the audio is in accordance with BS. 775-1 ITU-R <xref target="775itu"></xref>.
  225. </t>
  226. </section>
  227. <section anchor="Example RTP Packet" title="Example RTP Packet">
  228. <t>
  229. Here is an example RTP packet containing two Vorbis packets.
  230. </t>
  231. <t>
  232. RTP Packet Header:
  233. </t>
  234. <figure anchor="Example Header Packet (RTP Headers)" title="Example Packet (RTP Headers)">
  235. <artwork><![CDATA[
  236. 0 1 2 3
  237. 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
  238. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  239. | 2 |0|0| 0 |0| PT | sequence number |
  240. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  241. | timestamp (in sample rate units) |
  242. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  243. | synchronisation source (SSRC) identifier |
  244. +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
  245. | contributing source (CSRC) identifiers |
  246. | ... |
  247. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  248. ]]></artwork>
  249. </figure>
  250. <t>
  251. Payload Data:
  252. </t>
  253. <figure anchor="Example Packet (Payload Data)" title="Example Packet (Payload Data)">
  254. <artwork><![CDATA[
  255. 0 1 2 3
  256. 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
  257. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  258. | Codebook Ident |
  259. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  260. |0|1| 0 | 2 pks | length | vorbis data ..
  261. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  262. .. vorbis data |
  263. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  264. | length | next vorbis packet data ..
  265. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  266. .. vorbis data |
  267. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  268. ]]></artwork>
  269. </figure>
  270. <t>
  271. The payload data section of the RTP packet starts with the 32 bit Codebook Ident field followed by the one octet
  272. configuration header, which has the number of Vorbis frames set to 2. Each of the Vorbis data frames is prefixed by the two
  273. octet length field.
  274. </t>
  275. </section>
  276. </section>
  277. <section anchor="Frame Packetizing" title="Frame Packetizing">
  278. <t>
  279. Each RTP packet contains either one complete Vorbis packet, one Vorbis packet fragment, or an integer number of complete Vorbis
  280. packets (up to a max of 15 packets, since the number of packets is defined by a 4 bit value).
  281. </t>
  282. <t>
  283. Any Vorbis data packet that is less than path MTU SHOULD be bundled in the RTP packet with as many Vorbis packets as will
  284. fit, up to a maximum of 15. Path MTU is detailed in <xref target="rfc1063"></xref> and <xref target="rfc1981"></xref>.
  285. </t>
  286. <t>
  287. If a Vorbis packet is larger than 65535 octets it MUST be fragmented. A fragmented packet has a zero in the last four bits
  288. of the payload header. Each fragment after the first will also set the Continued (C) bit to one in the payload header. The
  289. RTP packet containing the last fragment of the Vorbis packet will have the Fragmented (F) bit set to one. To maintain the
  290. correct sequence for fragmented packet reception the timestamp field of fragmented packets MUST be the same as the first
  291. packet sent, with the sequence number incremented as normal for the subsequent RTP packets.
  292. </t>
  293. <section anchor="Example Fragmented Vorbis Packet" title="Example Fragmented Vorbis Packet">
  294. <t>
  295. Here is an example fragmented Vorbis packet split over three RTP packets. Each packet contains the standard RTP headers as
  296. well as the 5 octet Vorbis headers.
  297. </t>
  298. <figure anchor="Example Fragmented Packet (Packet 1)" title="Example Fragmented Packet (Packet 1)">
  299. <artwork><![CDATA[
  300. Packet 1:
  301. 0 1 2 3
  302. 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
  303. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  304. |V=2|P|X| CC |M| PT | 1000 |
  305. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  306. | xxxxx |
  307. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  308. | synchronization source (SSRC) identifier |
  309. +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
  310. | contributing source (CSRC) identifiers |
  311. | ... |
  312. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  313. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  314. | Codebook Ident |
  315. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  316. |0|0| 0 | 0| length | vorbis data ..
  317. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  318. .. vorbis data |
  319. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  320. ]]></artwork>
  321. </figure>
  322. <t>
  323. In this packet the initial sequence number is 1000 and the timestamp is xxxxx. The Continuation (C) bit is set to one,
  324. indicating it is not the continuation of a fragmented bit, and the Fragmentation (F) is set to 0 indicating it is a fragmented
  325. packet. The number of packets field is set to 0, and as the payload is raw Vorbis data the VDT field is set to 0.
  326. </t>
  327. <figure anchor="Example Fragmented Packet (Packet 2)" title="Example Fragmented Packet (Packet 2)">
  328. <artwork><![CDATA[
  329. Packet 2:
  330. 0 1 2 3
  331. 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
  332. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  333. |V=2|P|X| CC |M| PT | 1001 |
  334. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  335. | xxxxx |
  336. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  337. | synchronization source (SSRC) identifier |
  338. +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
  339. | contributing source (CSRC) identifiers |
  340. | ... |
  341. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  342. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  343. | Codebook Ident |
  344. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  345. |1|0| 0 | 0| length | vorbis data ..
  346. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  347. .. vorbis data |
  348. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  349. ]]></artwork>
  350. </figure>
  351. <t>
  352. The C bit is set to 1 and the number of packets field is set to 0. For large Vorbis fragments there can be several of these type
  353. of payload packets. The maximum packet size SHOULD be no greater than the path MTU, including all RTP and payload headers. The
  354. sequence number has been incremented by one but the timestamp field remains the same as the initial packet.
  355. </t>
  356. <figure anchor="Example Fragmented Packet (Packet 3)" title="Example Fragmented Packet (Packet 3)">
  357. <artwork><![CDATA[
  358. Packet 3:
  359. 0 1 2 3
  360. 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
  361. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  362. |V=2|P|X| CC |M| PT | 1002 |
  363. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  364. | xxxxx |
  365. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  366. | synchronization source (SSRC) identifier |
  367. +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
  368. | contributing source (CSRC) identifiers |
  369. | ... |
  370. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  371. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  372. | Codebook Ident |
  373. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  374. |1|1| 0 | 0| length | vorbis data ..
  375. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  376. .. vorbis data |
  377. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  378. ]]></artwork>
  379. </figure>
  380. <t>
  381. This is the last Vorbis fragment packet. The C and F bits are set and the packet count remains set to 0. As in the previous
  382. packets the timestamp remains set to the first packet in the sequence and the sequence number has been incremented.
  383. </t>
  384. </section>
  385. <section anchor="Packet Loss" title="Packet Loss">
  386. <t>
  387. As there is no error correction within the Vorbis stream, packet loss will result in a loss of signal. Packet loss is more of an
  388. issue for fragmented Vorbis packets as the client will have to cope with the handling of the C and F flags. If we use the
  389. fragmented Vorbis packet example above and the first packet is lost the client SHOULD detect that the next packet has the packet
  390. count field set to 0 and the C bit is set and MUST drop it. The next packet, which is the final fragmented packet, SHOULD
  391. be dropped in the same manner, or buffered. Feedback reports on lost and dropped packets MUST be sent back via RTCP.
  392. </t>
  393. <t>
  394. If a particular multicast session has a large number of participants care must be taken to prevent an RTCP feedback implosion,
  395. <xref target="rtcp-feedback"></xref>, in the event of packet loss from a large number of participants.
  396. </t>
  397. <t>
  398. Loss of any of the configuration headers, detailed below, is dealt with in the Loss of Configuration Headers Section later.
  399. </t>
  400. </section>
  401. </section>
  402. <section anchor="Configuration Headers" title="Configuration Headers">
  403. <t>
  404. Unlike other mainstream audio codecs Vorbis has no statically configured probability model, instead it packs all entropy decoding
  405. configuration, VQ and Huffman models into a self-contained codebook. This codebook block also requires additional identification
  406. information detailing the number of audio channels, bitrates and other information used to initialise the Vorbis stream.
  407. </t>
  408. <t>
  409. To decode a Vorbis stream three configuration header blocks are needed. The first header indicates the sample and bitrates, the
  410. number of channels and the version of the Vorbis encoder used. The second header contains the decoders probability model, or
  411. codebook and the third header details stream metadata.
  412. </t>
  413. <t>
  414. As the RTP stream may change certain configuration data mid-session there are two different methods for delivering this
  415. configuration data to a client, in-band and SDP which is detailed below. SDP delivery is used to set-up an initial
  416. state for the client application and in-band is used to change state during the session. The changes may be due to
  417. different metadata or codebooks as well as different bitrates of the stream.
  418. </t>
  419. <t>
  420. Out of the two delivery vectors the use of an SDP attribute to indicate an URI where the configuration and codebook data
  421. can be obtained is preferred as they can be fetched reliably using TCP. The in-band codebook delivery SHOULD
  422. only be used in situations where the link between the client is unidirectional or if the SDP-based information is not available.
  423. </t>
  424. <t>
  425. Synchronizing the configuration and codebook headers to the RTP stream is critical. The 32 bit Codebook Ident field is used
  426. to indicate when a change in the stream has taken place. The client application MUST have in advance the correct configuration
  427. and codebook headers and if the client detects a change in the Ident value and does not have this information it MUST NOT
  428. decode the raw Vorbis data.
  429. </t>
  430. <section anchor="In-band Header Transmission" title="In-band Header Transmission">
  431. <t>
  432. The three header data blocks are sent in-band with the packet type bits set to match the payload type. Normally the codebook
  433. and configuration headers are sent once per session if the stream is an encoding of live audio, as typically
  434. the encoder state will not change, but the encoder state can change at the boundary of chained Vorbis audio files. Metadata
  435. can be sent at the start as well as any time during the life of the session. Clients MUST be capable of dealing with periodic
  436. re-transmission of the configuration headers.
  437. </t>
  438. <section anchor="Setup Header" title="Setup Header">
  439. <t>
  440. A Vorbis Setup header is indicated with the payload type field set to 1.
  441. The Vorbis version MUST be set to zero to comply with this document. The fields Sample Rate, Bitrate Maximum/Nominal/Minimum
  442. and Num Audio Channels are set in accordance with <xref target="vorbis-spec-ref"></xref> with the bsz fields above referring
  443. to the blocksize parameters. The framing bit is not used for RTP transportation and so applications constructing Vorbis files
  444. MUST take care to set this if required.
  445. </t>
  446. <figure anchor="Setup Header Figure" title="Setup Header">
  447. <artwork><![CDATA[
  448. 0 1 2 3
  449. 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
  450. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  451. |V=2|P|X| CC |M| PT | xxxx |
  452. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  453. | xxxxx |
  454. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  455. | synchronization source (SSRC) identifier |
  456. +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
  457. | contributing source (CSRC) identifiers |
  458. | ... |
  459. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  460. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  461. | Codebook Ident |
  462. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  463. |0|1| 2 | 1| bsz 0 | bsz 1 | Num Audio Channels |
  464. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  465. | Vorbis Version |
  466. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  467. | Audio Sample Rate |
  468. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  469. | Bitrate Maximum |
  470. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  471. | Bitrate Nominal |
  472. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  473. | Bitrate Minimum |
  474. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  475. ]]></artwork>
  476. </figure>
  477. </section>
  478. <section anchor="Codebook Header" title="Codebook Header">
  479. <t>
  480. If the payload type field is set to 2, this indicates the packet contains Codebook data.
  481. </t>
  482. <t>
  483. The configuration information detailed below MUST be completely intact, as a client can not decode a stream with an
  484. incomplete or corrupted codebook set.
  485. </t>
  486. <t>
  487. A 16 bit codebook length field precedes the codebook datablock. The length field allows for codebooks to be up to 64K
  488. in size. Packet fragmentation, as per the Vorbis data, MUST be performed if the codebooks size exceeds path MTU. The
  489. Codebook Ident field MUST be set to match the associated codebook needed to decode the Vorbis stream.
  490. </t>
  491. <t>
  492. The Codebook Ident is the CRC32 checksum of the codebook and is used to detect a corrupted codebook as well as associating
  493. it with its Vorbis data stream. This Ident value MUST NOT be set to the value of the current stream if this header is being
  494. sent before the boundary of the chained file has been reached. If a checksum failure is detected then this is considered to
  495. be a failure and MUST be reported to the client application.
  496. </t>
  497. <figure anchor="Codebook Header Figure" title="Codebook Header">
  498. <artwork><![CDATA[
  499. 0 1 2 3
  500. 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
  501. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  502. |V=2|P|X| CC |M| PT | xxxx |
  503. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  504. | xxxxx |
  505. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  506. | synchronization source (SSRC) identifier |
  507. +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
  508. | contributing source (CSRC) identifiers |
  509. | ... |
  510. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  511. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  512. | Codebook Ident |
  513. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  514. |0|1| 2 | 1| Codebook Length |
  515. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  516. | length | Codebook ..
  517. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  518. .. Codebook |
  519. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  520. ]]></artwork>
  521. </figure>
  522. <section anchor="Codebook CRC32 Generation" title="Codebook CRC32 Generation">
  523. <t>
  524. In order for different implementations of Vorbis RTP clients and servers to interoperate with each other a common format
  525. for the production of the CRC32 hash is required. The polynomial is X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0.
  526. </t>
  527. <t>
  528. The following C code function SHOULD be used by implementations, if not then the code responsible for generating the CRC32
  529. value MUST use the polynomial function above.
  530. </t>
  531. <artwork><![CDATA[
  532. unsigned int crc32 (int length, unsigned char *crcdata)
  533. {
  534. int index, loop;
  535. unsigned int byte, crc, mask;
  536. index = 0;
  537. crc = 0xFFFFFFFF;
  538. while (index < length) {
  539. byte = crcdata [index];
  540. crc = crc ^ byte;
  541. for (loop = 7; loop >= 0; loop--) {
  542. mask = -(crc & 1);
  543. crc = (crc >> 1) ^ (0xEDB88320 & mask);
  544. }
  545. index++;
  546. }
  547. return ~crc;
  548. }
  549. ]]></artwork>
  550. </section>
  551. </section>
  552. <section anchor="Metadata Header" title="Metadata Header">
  553. <t>
  554. With the payload type flag set to 3, this indicates that the packet contain the comment metadata, such as artist name, track title
  555. and so on. These metadata messages are not intended to be fully descriptive but to offer basic track/song information. This
  556. message MUST be sent at the start of the stream, together with the setup and codebook headers, even if it contains no information.
  557. During a session the metadata associated with the stream may change from that specified at the start, e.g. a live concert
  558. broadcast changing acts/scenes, so clients MUST have the ability to receive Metadata header blocks. Details on the format of the
  559. comments can be found in the Vorbis documentation <xref target="v-comment"></xref>.
  560. </t>
  561. <t>
  562. The format for the data takes the form of a 32 bit codec vendors name length field followed by the name encoded in UTF-8. The
  563. next 32 bit field denotes the number of user comments. Each of the user comments is prefixed by a 32 bit length field followed by
  564. the comment text.
  565. </t>
  566. <figure anchor="Metadata Header Figure" title="Metadata Header">
  567. <artwork><![CDATA[
  568. 0 1 2 3
  569. 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
  570. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  571. |V=2|P|X| CC |M| PT | xxxx |
  572. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  573. | xxxxx |
  574. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  575. | synchronization source (SSRC) identifier |
  576. +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
  577. | contributing source (CSRC) identifiers |
  578. | ... |
  579. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  580. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  581. | Codebook Ident |
  582. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  583. |0|1| 3 | 1| Vendor string length |
  584. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  585. | length | Vendor string ..
  586. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  587. | User comments list length |
  588. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  589. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  590. | User comment length |
  591. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  592. | User comment ..
  593. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  594. .. User comment |
  595. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  596. ]]></artwork>
  597. </figure>
  598. </section>
  599. </section>
  600. <section anchor="Packed Headers Delivery" title="Packed Headers Delivery">
  601. <t>
  602. As mentioned above the RECOMMENDED delivery vector for Vorbis configuration data is via an SDP attribute as this retrieval method
  603. can be performed using a reliable transport protocol.
  604. </t>
  605. <figure anchor="Packed Headers Overview Figure" title="Packed Headers Overview">
  606. <artwork><![CDATA[
  607. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  608. | Number of packed headers |
  609. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  610. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  611. | Packed header |
  612. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  613. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  614. | Packed header |
  615. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  616. ]]></artwork>
  617. </figure>
  618. <t>
  619. As the RTP headers are not required for this method of delivery the
  620. structure of the configuration data is slightly different. The packed header starts with a 32 bit count field which details the number of packed headers that are contained in the bundle. Next is the packed header payload for each chained Vorbis file.
  621. </t>
  622. <figure anchor="Packed Headers Detail Figure" title="Packed Headers Detail">
  623. <artwork><![CDATA[
  624. 0 1 2 3
  625. 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
  626. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  627. | Header Length |
  628. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  629. | Codebook Ident |
  630. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  631. | Setup Header ..
  632. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  633. .. Setup Header |
  634. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  635. | Codebook Header ..
  636. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  637. .. Codebook Header |
  638. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  639. | Metadata Header ..
  640. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  641. .. Metadata Header |
  642. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  643. ]]></artwork>
  644. </figure>
  645. <t>The key difference between the in-band format is there is no need for the payload header octet and Codebook Ident field.
  646. Below are examples of the packed headers format.
  647. </t>
  648. <figure anchor="Packed Setup Header Figure" title="Packed Setup Header">
  649. <artwork><![CDATA[
  650. 0 1 2 3
  651. 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
  652. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  653. |0|1| 2 | 1| bsz 0 | bsz 1 | Num Audio Channels |
  654. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  655. | Vorbis Version |
  656. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  657. | Audio Sample Rate |
  658. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  659. | Bitrate Maximum |
  660. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  661. | Bitrate Nominal |
  662. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  663. | Bitrate Minimum |
  664. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  665. ]]></artwork>
  666. </figure>
  667. <t>
  668. The alignment of the packed Setup Header is slightly different from the RTP payload type as the payload header is not used.
  669. </t>
  670. <figure anchor="Packed Codebook Header Figure" title="Packed Codebook Header">
  671. <artwork><![CDATA[
  672. 0 1 2 3
  673. 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
  674. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  675. | Codebook Length |
  676. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  677. | Codebook ..
  678. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  679. .. Codebook |
  680. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  681. ]]></artwork>
  682. </figure>
  683. <t>
  684. The packed Codebook header also has a slightly different structure to that of the RTP payload type. The Codebook Ident field that
  685. is normally part of this structure is moved to the second field of the overall packed structure.
  686. </t>
  687. <figure anchor="Packed Metadata Header Figure" title="Packed Metadata Header">
  688. <artwork><![CDATA[
  689. 0 1 2 3
  690. 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
  691. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  692. | Vendor string length |
  693. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  694. | Vendor string |
  695. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  696. | User comments list length |
  697. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  698. | User comment length / User comment ..
  699. +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  700. ]]></artwork>
  701. </figure>
  702. <t>
  703. The packed Metadata header also as a slightly different structure to that of the RTP payload type with the payload header not being used.
  704. </t>
  705. <section anchor="Packed Headers IANA Considerations" title="Packed Headers IANA Considerations">
  706. <t>
  707. The following IANA considerations MUST only be applied to the packed headers.
  708. </t>
  709. <t>
  710. MIME media type name: audio
  711. </t>
  712. <t>
  713. MIME subtype: vorbis-config
  714. </t>
  715. <t>
  716. Required Parameters:</t><t>
  717. None.
  718. </t>
  719. <t>
  720. Optional Parameters: </t><t>
  721. None.
  722. </t>
  723. <t>
  724. Encoding considerations:</t><t>
  725. This type is only defined for transfer via HTTP as specified in RFC XXXX.
  726. </t>
  727. <t>
  728. Security Considerations:</t><t>
  729. See Section 6 of RFC 3047.
  730. </t>
  731. <t>
  732. Interoperability considerations: none
  733. </t>
  734. <t>
  735. Published specification:</t>
  736. <t>See RFC XXXX for details.</t>
  737. <t>
  738. Applications which use this media type:</t><t>
  739. Vorbis encoded audio, configuration data.
  740. </t>
  741. <t>
  742. Additional information: none
  743. </t>
  744. <t>
  745. Person &amp; email address to contact for further information:</t><t>
  746. Phil Kerr: &lt;phil@plus24.com&gt;
  747. </t>
  748. <t>
  749. Intended usage: COMMON
  750. </t>
  751. <t>Author/Change controller:</t>
  752. <t>Author: Phil Kerr</t>
  753. <t>Change controller: IETF AVT Working Group</t>
  754. </section>
  755. </section>
  756. <section anchor="Codebook Caching" title="Codebook Caching">
  757. <t>
  758. Codebook caching allows clients that have previously connected to a stream to re-use the associated codebooks and configuration
  759. data. When a client receives a codebook it may store it locally and can compare the CRC32 key with that of the new stream and
  760. begin decoding before it has received any of the headers.
  761. </t>
  762. </section>
  763. <section anchor="Loss of Configuration Headers" title="Loss of Configuration Headers">
  764. <t>
  765. Unlike the loss of raw Vorbis payload data, loss of a configuration header can lead to a situation where it will not be possible
  766. to successfully decode the stream.
  767. </t>
  768. <t>
  769. Out of the three headers, loss of either the Codebook or Setup headers MUST result in the halting of stream decoding.
  770. Loss of the Metadata header SHOULD NOT be regarded as fatal for decoding. Loss of any of the headers SHOULD be reported to the
  771. client as well as a loss report sent via RTCP.
  772. </t>
  773. </section>
  774. </section>
  775. <section anchor="IANA Considerations" title="IANA Considerations">
  776. <t>
  777. MIME media type name: audio
  778. </t>
  779. <t>
  780. MIME subtype: vorbis
  781. </t>
  782. <t>
  783. Required Parameters:</t><t>
  784. header indicates the URI of the decoding configuration headers.
  785. </t>
  786. <t>
  787. Optional Parameters: </t><t>
  788. None.
  789. </t>
  790. <t>
  791. Encoding considerations:</t><t>
  792. This type is only defined for transfer via RTP as specified
  793. in RFC XXXX.
  794. </t>
  795. <t>
  796. Security Considerations:</t><t>
  797. See Section 6 of RFC 3047.
  798. </t>
  799. <t>
  800. Interoperability considerations: none
  801. </t>
  802. <t>
  803. Published specification:</t>
  804. <t>See the Vorbis documentation <xref target="vorbis-spec-ref"></xref> for details.</t>
  805. <t>
  806. Applications which use this media type:</t><t>
  807. Audio streaming and conferencing tools
  808. </t>
  809. <t>
  810. Additional information: none
  811. </t>
  812. <t>
  813. Person &amp; email address to contact for further information:</t><t>
  814. Phil Kerr: &lt;phil@plus24.com&gt;
  815. </t>
  816. <t>
  817. Intended usage: COMMON
  818. </t>
  819. <t>Author/Change controller:</t>
  820. <t>Author: Phil Kerr</t>
  821. <t>Change controller: IETF AVT Working Group</t>
  822. <section anchor="Mapping MIME Parameters into SDP" title="Mapping MIME Parameters into SDP">
  823. <t>
  824. The information carried in the MIME media type specification has a specific mapping to fields in the Session Description
  825. Protocol (SDP) <xref target="rfc2327"></xref>, which is commonly used to describe RTP sessions. When SDP is used to specify
  826. sessions the mapping are as follows:
  827. </t>
  828. <vspace blankLines="1" />
  829. <list style="symbols">
  830. <t>The MIME type ("audio") goes in SDP "m=" as the media name.</t>
  831. <vspace blankLines="1" />
  832. <t>The MIME subtype ("VORBIS") goes in SDP "a=rtpmap" as the encoding name.</t>
  833. <vspace blankLines="1" />
  834. <t>The parameter "rate" also goes in "a=rtpmap" as clock rate.</t>
  835. <vspace blankLines="1" />
  836. <t>The parameter "channels" also goes in "a=rtpmap" as channel count.</t>
  837. <vspace blankLines="1" />
  838. <t>The parameter "header" goes in the SDP "a=fmpt" attribute.</t>
  839. </list>
  840. <t>
  841. If the stream comprises chained Vorbis files the configuration and codebook headers for each file SHOULD be packaged together
  842. and passed to the client using the headers attribute if all the files to be played are known in advance.
  843. </t>
  844. <t>
  845. The Vorbis configuration specified in the header attribute MUST contain all of the configuration data and codebooks needed for
  846. the life of the session.
  847. </t>
  848. <t>
  849. The port value is specified by the server application bound to the address specified in the c attribute. The bitrate value
  850. and channels specified in the rtpmap attribute MUST match the Vorbis sample rate value. An example is found below.
  851. </t>
  852. <vspace blankLines="1" />
  853. <list style="empty">
  854. <t>c=IN IP4/6 </t>
  855. <t>m=audio RTP/AVP 98</t>
  856. <t>a=rtpmap:98 VORBIS/44100/2</t>
  857. <t>a=fmtp:98 header=&lt;URL of configuration header&gt; </t>
  858. </list>
  859. <t>
  860. Note that the payload format (encoding) names are commonly shown in upper case. MIME subtypes are commonly shown in lower
  861. case. These names are case-insensitive in both places. Similarly, parameter names are case-insensitive both in MIME types and
  862. in the default mapping to the SDP a=fmtp attribute. The exception regarding case sensitivity is the configuration header URL
  863. which MUST be regarded as being case sensitive.
  864. </t>
  865. <t>
  866. The answer to any offer, <xref target="rfc3264"></xref>, MUST NOT change the URL specified in the header attribute.
  867. </t>
  868. </section>
  869. </section>
  870. <section anchor="Congestion Control" title="Congestion Control">
  871. <t>
  872. Vorbis clients SHOULD send regular receiver reports detailing congestion. A mechanism for dynamically downgrading the stream,
  873. known as bitrate peeling, will allow for a graceful backing off of the stream bitrate. This feature is not available at present
  874. so an alternative would be to redirect the client to a lower bitrate stream if one is available.
  875. </t>
  876. <t>
  877. If a particular multicast session has a large number of participants care must be taken to prevent an RTCP feedback implosion,
  878. <xref target="rtcp-feedback"></xref>, in the event of congestion.
  879. </t>
  880. </section>
  881. <section anchor="Security Considerations" title="Security Considerations">
  882. <t>
  883. RTP packets using this payload format are subject to the security considerations discussed in the RTP specification
  884. <xref target="rfc3550"></xref>. This implies that the confidentiality of the media stream is achieved by using
  885. encryption. Because the data compression used with this payload format is applied end-to-end, encryption may be performed on the
  886. compressed data. Where the size of a data block is set care MUST be taken to prevent buffer overflows in the client applications.
  887. </t>
  888. </section>
  889. <section anchor="Acknowledgments" title="Acknowledgments">
  890. <t>
  891. This document is a continuation of draft-moffitt-vorbis-rtp-00.txt. The MIME type section is a continuation of
  892. draft-short-avt-rtp-vorbis-mime-00.txt
  893. </t>
  894. <t>
  895. Thanks to the AVT, Ogg Vorbis Communities / Xiph.org including Steve Casner, Aaron Colwell, Ross Finlayson, Fluendo, Ramon Garcia,
  896. Pascal Hennequin, Ralph Giles, Tor-Einar Jarnbjo, Colin Law, John Lazzaro, Jack Moffitt, Christopher Montgomery,
  897. Colin Perkins, Barry Short, Mike Smith, Michael Sparks, Magnus Westerlund.
  898. </t>
  899. </section>
  900. </middle>
  901. <back>
  902. <references title="Normative References">
  903. <reference anchor="rfc3533">
  904. <front>
  905. <title>The Ogg Encapsulation Format Version 0</title>
  906. <author initials="S." surname="Pfeiffer" fullname="Silvia Pfeiffer"></author>
  907. </front>
  908. <seriesInfo name="RFC" value="3533" />
  909. </reference>
  910. <reference anchor="rfc2119">
  911. <front>
  912. <title>Key words for use in RFCs to Indicate Requirement Levels </title>
  913. <author initials="S." surname="Bradner" fullname="Scott Bradner"></author>
  914. </front>
  915. <seriesInfo name="RFC" value="2119" />
  916. </reference>
  917. <reference anchor="rfc3550">
  918. <front>
  919. <title>RTP: A Transport Protocol for real-time applications</title>
  920. <author initials="H." surname="Schulzrinne" fullname=""></author>
  921. <author initials="S." surname="Casner" fullname=""></author>
  922. <author initials="R." surname="Frederick" fullname=""></author>
  923. <author initials="V." surname="Jacobson" fullname=""></author>
  924. </front>
  925. <seriesInfo name="RFC" value="3550" />
  926. </reference>
  927. <reference anchor="rfc3551">
  928. <front>
  929. <title>RTP Profile for Audio and Video Conferences with Minimal Control.</title>
  930. <author initials="H." surname="Schulzrinne" fullname=""></author>
  931. <author initials="S." surname="Casner" fullname=""></author>
  932. </front>
  933. <date month="July" year="2003" />
  934. <seriesInfo name="RFC" value="3551" />
  935. </reference>
  936. <reference anchor="rfc2327">
  937. <front>
  938. <title>SDP: Session Description Protocol</title>
  939. <author initials="M." surname="Handley" fullname="Mark Handley"></author>
  940. <author initials="V." surname="Jacobson" fullname="Van Jacobson"></author>
  941. </front>
  942. <seriesInfo name="RFC" value="2327" />
  943. </reference>
  944. <reference anchor="rfc1063">
  945. <front>
  946. <title>Path MTU Discovery</title>
  947. <author initials="J." surname="Mogul et al." fullname="J. Mogul et al."></author>
  948. </front>
  949. <seriesInfo name="RFC" value="1063" />
  950. </reference>
  951. <reference anchor="rfc1981">
  952. <front>
  953. <title>Path MTU Discovery for IP version 6</title>
  954. <author initials="J." surname="McCann et al." fullname="J. McCann et al."></author>
  955. </front>
  956. <seriesInfo name="RFC" value="1981" />
  957. </reference>
  958. <reference anchor="rfc3264">
  959. <front>
  960. <title>An Offer/Answer Model with Session Description Protocol (SDP)</title>
  961. <author initials="J." surname="Rosenberg" fullname="Jonathan Rosenberg"></author>
  962. <author initials="H." surname="Schulzrinne" fullname="Henning Schulzrinne"></author>
  963. </front>
  964. <seriesInfo name="RFC" value="3264" />
  965. </reference>
  966. <reference anchor="rtcp-feedback">
  967. <front>
  968. <title>Extended RTP Profile for RTCP-based Feedback (RTP/AVPF)</title>
  969. <author initials="J." surname="Ott" fullname="Joerg Ott"></author>
  970. <author initials="S." surname="Wenger" fullname="Stephan Wenger"></author>
  971. <author initials="N." surname="Sato" fullname="Noriyuki Sato"></author>
  972. <author initials="C." surname="Burmeister" fullname="Carsten Burmeister"></author>
  973. <author initials="J." surname="Rey" fullname="Jose Rey"></author>
  974. </front>
  975. <seriesInfo name="Internet Draft" value="(draft-ietf-avt-rtcp-feedback-11: Work in progress)" />
  976. </reference>
  977. </references>
  978. <references title="Informative References">
  979. <reference anchor="libvorbis">
  980. <front>
  981. <title>libvorbis: Available from the Xiph website, http://www.xiph.org</title>
  982. </front>
  983. </reference>
  984. <reference anchor="vorbis-spec-ref">
  985. <front>
  986. <title>Ogg Vorbis I specification: Codec setup and packet decode. Available from the Xiph website, http://www.xiph.org</title>
  987. </front>
  988. </reference>
  989. <reference anchor="v-comment">
  990. <front>
  991. <title>Ogg Vorbis I specification: Comment field and header specification. Available from the Xiph website,
  992. http://www.xiph.org</title>
  993. </front>
  994. </reference>
  995. <reference anchor="775itu">
  996. <front>
  997. <title>ITU (1992-1994) ITU-R Recommendation BS. 775-1 Multi-channel stereophonic sound system with or without accompanying
  998. picture. International Telecommunications Union. Available from the ITU website, http://www.itu.int</title>
  999. </front>
  1000. </reference>
  1001. </references>
  1002. </back>
  1003. </rfc>