123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890 |
- #LyX 2.1 created this file. For more info see http://www.lyx.org/
- \lyxformat 474
- \begin_document
- \begin_header
- \textclass article
- \use_default_options true
- \maintain_unincluded_children false
- \language english
- \language_package default
- \inputencoding auto
- \fontencoding global
- \font_roman default
- \font_sans default
- \font_typewriter default
- \font_math auto
- \font_default_family default
- \use_non_tex_fonts false
- \font_sc false
- \font_osf false
- \font_sf_scale 100
- \font_tt_scale 100
- \graphics default
- \default_output_format default
- \output_sync 0
- \bibtex_command default
- \index_command default
- \paperfontsize default
- \use_hyperref false
- \papersize default
- \use_geometry false
- \use_package amsmath 1
- \use_package amssymb 1
- \use_package cancel 0
- \use_package esint 1
- \use_package mathdots 1
- \use_package mathtools 0
- \use_package mhchem 1
- \use_package stackrel 0
- \use_package stmaryrd 0
- \use_package undertilde 0
- \cite_engine basic
- \cite_engine_type default
- \biblio_style plain
- \use_bibtopic false
- \use_indices false
- \paperorientation portrait
- \suppress_date false
- \justification true
- \use_refstyle 1
- \index Index
- \shortcut idx
- \color #008000
- \end_index
- \secnumdepth 3
- \tocdepth 3
- \paragraph_separation indent
- \paragraph_indentation default
- \quotes_language english
- \papercolumns 1
- \papersides 1
- \paperpagestyle default
- \tracking_changes false
- \output_changes false
- \html_math_output 0
- \html_css_as_file 0
- \html_be_strict false
- \end_header
- \begin_body
- \begin_layout Title
- PVQ Encoding with Non-Uniform Distribution
- \end_layout
- \begin_layout Author
- Jean-Marc Valin, Timothy B.
- Terriberry
- \end_layout
- \begin_layout Section
- Introduction
- \end_layout
- \begin_layout Standard
- The pyramid vector quantizer (PVQ) is common form of algebraic vector quantizati
- on.
- It is useful in the context of both audio and video compression.
- The PVQ codebook is defined
- \end_layout
- \begin_layout Standard
- \begin_inset Formula
- \begin{equation}
- S\left(N,K\right)=\left\{ \mathbf{y}\in\mathbb{Z^{N}}:\sum_{i=0}^{N-1}\left|y_{i}\right|=K\right\} \ ,
- \end{equation}
- \end_inset
- the set of all integer vectors in
- \begin_inset Formula $N$
- \end_inset
- dimensions for which the sum of absolute values equals
- \begin_inset Formula $K$
- \end_inset
- .
- When all codevectors are considered to have equal probability, several
- methods
- \begin_inset space ~
- \end_inset
- \begin_inset CommandInset citation
- LatexCommand cite
- key "Fischer,FPC"
- \end_inset
- exist to convert between any codevector and an index
- \begin_inset Formula $J$
- \end_inset
- in the range
- \begin_inset Formula $[0,\, V(N,K)-1]$
- \end_inset
- , where
- \begin_inset Formula $V(N,K)$
- \end_inset
- is the number of elements in
- \begin_inset Formula $S(N,K)$
- \end_inset
- .
- The index is then easily coded in a bit-stream, possibly with the use of
- a range coder
- \begin_inset space ~
- \end_inset
- \begin_inset CommandInset citation
- LatexCommand cite
- key "RFC6716"
- \end_inset
- to allow for fractional bits since
- \begin_inset Formula $V(N,K)$
- \end_inset
- is generally not a power of two.
- The equal-probability case is common for audio.
- For video, transform coefficients (e.g.
- DCT) or any prediction residual for a block tend to have widely different
- distributions.
- For this reason, using a uniform probability model.
- This document proposes a way to efficiently encode the result of PVQ quantizati
- on with such non-uniform distributions.
- \end_layout
- \begin_layout Section
- Non-Uniform Distribution
- \end_layout
- \begin_layout Standard
- The non-uniform probability distribution of the codevectors requires building
- a probability model.
- For any codebook of reasonable size, explicitly modelling the distribution
- of
- \begin_inset Formula $J$
- \end_inset
- itself is impractical since
- \begin_inset Formula $V(N,K)$
- \end_inset
- can easily exceed 32
- \begin_inset space ~
- \end_inset
- bits.
- Instead, we use parametric models for the distribution of
- \begin_inset Formula $\left|y_{i}\right|$
- \end_inset
- as a function of
- \begin_inset Formula $i$
- \end_inset
- .
- Sections
- \begin_inset space ~
- \end_inset
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "sub:Coefficient-model"
- \end_inset
- and
- \begin_inset space ~
- \end_inset
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "sub:Run-Length-Model"
- \end_inset
- present two possible models for encoding non-uniform PVQ parameters.
- Both models assume the use of a range/arithmetic coder, ideally one that
- is capable of encoding non-binary symbols.
- In most cases, the probability distribution functions (pdf) can be stored
- in a lookup table in the form of cumulative distribution functions (cdf)
- that can be used directly by the encoder and decoder.
-
- \end_layout
- \begin_layout Subsection
- Coefficient Magnitude Model
- \end_layout
- \begin_layout Standard
- \begin_inset CommandInset label
- LatexCommand label
- name "sub:Coefficient-model"
- \end_inset
- The coefficient magnitude (CM) model is based on the expected absolute value
- of the coefficient
- \begin_inset Formula $i$
- \end_inset
- \end_layout
- \begin_layout Standard
- \begin_inset Formula
- \begin{equation}
- \sigma_{i}=E\left\{ \left|y_{i}\right|\right\} =\sum_{k=0}^{\infty}p_{i}(k)\ ,
- \end{equation}
- \end_inset
- where
- \begin_inset Formula $p_{i}\left(k\right)$
- \end_inset
- is the probability that
- \begin_inset Formula $\left|y_{i}\right|=k$
- \end_inset
- .
- We assume that
- \begin_inset Formula $y$
- \end_inset
- is the result of quantizing
- \begin_inset Formula $x$
- \end_inset
- to the nearest integer, where
- \begin_inset Formula $x$
- \end_inset
- follows a Laplace distribution
- \begin_inset Formula
- \begin{equation}
- p\left(x\right)=r^{-\left|x\right|}\ .
- \end{equation}
- \end_inset
- Assuming the positive quantization thresholds are
- \begin_inset Formula $\theta+k,\, k\in\mathbb{N}$
- \end_inset
- , we have
- \begin_inset Formula
- \begin{equation}
- p\left(k\right)=\left\{ \begin{array}{ll}
- 1-r^{\theta} & ,\ k=0\\
- r^{\theta}\left(1-r\right)r^{k-1}\quad & ,\ k\neq0
- \end{array}\right.\ .
- \end{equation}
- \end_inset
- The value of
- \begin_inset Formula $r$
- \end_inset
- is obtained by modelling
- \begin_inset Formula $\sigma_{i}$
- \end_inset
- .
- By assuming
- \begin_inset Formula $\theta=1$
- \end_inset
- , we can have a simple relation for
- \begin_inset Formula $r$
- \end_inset
-
- \begin_inset Formula
- \begin{equation}
- r=\frac{\sigma_{i}}{1+\sigma_{i}}\ .\label{eq:r-vs-sigma}
- \end{equation}
- \end_inset
- We can still use
- \begin_inset Formula $\theta\neq1$
- \end_inset
- to model
- \begin_inset Formula $p\left(k\right)$
- \end_inset
- itself, in which case
- \begin_inset space ~
- \end_inset
- \begin_inset CommandInset ref
- LatexCommand eqref
- reference "eq:r-vs-sigma"
- \end_inset
- becomes an approximation.
- Typically,
- \begin_inset Formula $\theta$
- \end_inset
- is in the range
- \begin_inset Formula $\left[\frac{1}{2},1\right]$
- \end_inset
- .
- For efficiency reasons, we pre-compute the cdf corresponding to
- \begin_inset Formula $p\left(k\right)$
- \end_inset
- for different values of
- \begin_inset Formula $r$
- \end_inset
- .
- \end_layout
- \begin_layout Standard
- If all values
- \begin_inset Formula $y_{i}$
- \end_inset
- are identically distributed, then all expectiations
- \begin_inset Formula $\sigma_{i}$
- \end_inset
- are equal and simply
- \begin_inset Formula $\sigma_{i}=K/N$
- \end_inset
- .
- In practice, we assume that the values
- \begin_inset Formula $y_{i}$
- \end_inset
- are in decreasing order of expected value and make the approximation
- \begin_inset Formula
- \begin{equation}
- \sigma_{0}=\alpha K/N\ ,
- \end{equation}
- \end_inset
- where
- \begin_inset Formula $\alpha$
- \end_inset
- represents how uneven the distributions are (
- \begin_inset Formula $\alpha=1$
- \end_inset
- corresponds to identical distributions).
- Knowing
- \begin_inset Formula $\alpha$
- \end_inset
- , we can obtain
- \begin_inset Formula $\sigma_{0}$
- \end_inset
- ,
- \begin_inset Formula $r_{0}$
- \end_inset
- and thus
- \begin_inset Formula $p_{0}\left(k\right)$
- \end_inset
- , making it possible to encode (and decode using the same process)
- \begin_inset Formula $y_{0}$
- \end_inset
- .
- Knowing the value of
- \begin_inset Formula $y_{0}$
- \end_inset
- , we can encode
- \begin_inset Formula $y_{1}$
- \end_inset
- using
- \begin_inset Formula
- \begin{align}
- N^{\left(1\right)} & =N-1\nonumber \\
- K^{\left(1\right)} & =K-\left|y_{0}\right|\ .
- \end{align}
- \end_inset
- The process can be applied recursively until
- \begin_inset Formula $K=0$
- \end_inset
- or
- \begin_inset Formula $N=1$
- \end_inset
- .
- The coefficient
- \begin_inset Formula $\alpha$
- \end_inset
- is assumed constant across a vector and adapted between vectors based on
- the observed expectation
- \begin_inset Formula $\sigma_{i}$
- \end_inset
- as a function of
- \begin_inset Formula $K$
- \end_inset
- and
- \begin_inset Formula $N$
- \end_inset
- .
- Similar to a linear regression, we have
- \begin_inset Formula
- \begin{equation}
- \alpha=\frac{S_{y}}{S_{E}}\ ,
- \end{equation}
- \end_inset
- where for new vector, we update
- \begin_inset Formula $S_{y}$
- \end_inset
- and
- \begin_inset Formula $S_{E}$
- \end_inset
- as
- \begin_inset Formula
- \begin{align}
- S_{y} & \leftarrow\left(1-\eta\right)S_{y}+\eta\sum\left|y_{i}\right|\\
- S_{E} & \leftarrow\left(1-\eta\right)S_{E}+\eta\sum\left(K^{\left(i\right)}/N^{\left(i\right)}\right)\ ,
- \end{align}
- \end_inset
- where
- \begin_inset Formula $\eta$
- \end_inset
- controls the adaptation rate.
-
- \end_layout
- \begin_layout Standard
- The total number of symbols coded with this approach is equal to the position
-
- \begin_inset Formula $i_{last}$
- \end_inset
- of the last non-zero component of
- \begin_inset Formula $\mathbf{y}$
- \end_inset
- .
- \end_layout
- \begin_layout Subsection
- Run-Length Model
- \end_layout
- \begin_layout Standard
- \begin_inset CommandInset label
- LatexCommand label
- name "sub:Run-Length-Model"
- \end_inset
- For long sparse vectors, the method in Section
- \begin_inset space ~
- \end_inset
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "sub:Coefficient-model"
- \end_inset
- is inefficient in terms of symbols coded.
- In those cases the run-length (RL) model models
- \begin_inset Formula $q(n)$
- \end_inset
- , the probability of
- \begin_inset Formula $y_{n}$
- \end_inset
- being the first non-zero coefficient in
- \begin_inset Formula $\mathbf{y}$
- \end_inset
- , as a truncated exponential distribution
- \begin_inset Formula
- \begin{equation}
- q\left(n\right)=C_{1}\left\{ \begin{array}{ll}
- r^{-n}\quad & ,\ n<N\\
- 0 & ,\ n\geq N
- \end{array}\right.\ ,
- \end{equation}
- \end_inset
- where
- \begin_inset Formula $C_{1}$
- \end_inset
- is a normalization constant.
- We then model the expected value of
- \begin_inset Formula $q(n)$
- \end_inset
- as
- \begin_inset Formula
- \begin{equation}
- \sigma_{n}=E\left[q\left(n\right)\right]=\beta\cdot\frac{N}{K}\ ,
- \end{equation}
- \end_inset
- where
- \begin_inset Formula $\beta=1$
- \end_inset
- represents the case where non-zero coefficients are distributed evenly
- in the vector (typically,
- \begin_inset Formula $\beta<1$
- \end_inset
- ).
-
- \end_layout
- \begin_layout Standard
- The relationship between
- \begin_inset Formula $\sigma_{n}$
- \end_inset
- and
- \begin_inset Formula $r$
- \end_inset
- is given by
- \begin_inset Formula
- \begin{equation}
- \sigma_{n}=\frac{r^{N}-Nr+N-1}{r^{N-1}\left(1-r\right)^{N}}\ ,
- \end{equation}
- \end_inset
- so computing
- \begin_inset Formula $r$
- \end_inset
- from
- \family roman
- \series medium
- \shape up
- \size normal
- \emph off
- \bar no
- \strikeout off
- \uuline off
- \uwave off
- \noun off
- \color none
- \begin_inset Formula $\sigma_{n}$
- \end_inset
- is not easy.
- We use the approximation
- \begin_inset Formula
- \begin{equation}
- r\approx\frac{\sigma_{n}}{1+\sigma_{n}}+\frac{8\sigma_{n}^{2}}{\left(N+1\right)\left(N-1\right)^{2}}\ .
- \end{equation}
- \end_inset
- \end_layout
- \begin_layout Standard
- Once the position
- \begin_inset Formula $n$
- \end_inset
- of the first non-zero coefficient is coded, one pulse is subtracted from
- that position and the process is restarted with
- \begin_inset Formula
- \begin{align}
- N^{(1)} & =N-n\nonumber \\
- K^{(1)} & =K-1\ .
- \end{align}
- \end_inset
- If multiple pulses are present at a certain position, then we encode a position
- of zero for each pulse that follows the first pulse.
-
- \end_layout
- \begin_layout Standard
- Because the sign is fixed once a pulse is already present at a certain
- position, the probability of adding a pulse is divided by two.
- The distribution then becomes
- \begin_inset Formula
- \begin{equation}
- q(n)=C_{2}\left\{ \begin{array}{ll}
- 1/2\quad & ,\ n=0\\
- r^{-n} & ,\ 0<n<N\\
- 0 & ,\ n\geq N
- \end{array}\right..
- \end{equation}
- \end_inset
- The
- \begin_inset Formula $\beta$
- \end_inset
- parameters is adapted in a similar way as the
- \begin_inset Formula $\alpha$
- \end_inset
- parameter in
- \begin_inset CommandInset ref
- LatexCommand ref
- reference "sub:Coefficient-model"
- \end_inset
- :
- \begin_inset Formula
- \begin{equation}
- \beta=\frac{S_{p}}{S_{N}}\ ,
- \end{equation}
- \end_inset
- where for each new vector, we update
- \begin_inset Formula $S_{p}$
- \end_inset
- and
- \begin_inset Formula $S_{N}$
- \end_inset
- as
- \begin_inset Formula
- \begin{align}
- S_{p} & \leftarrow\left(1-\eta\right)S_{p}+\eta\sum n^{(i)}K^{(i)}\\
- S_{N} & \leftarrow\left(1-\eta\right)S_{N}+\eta\sum N^{\left(i\right)}\ ,
- \end{align}
- \end_inset
- where
- \begin_inset Formula $\eta$
- \end_inset
- controls the adaptation rate.
-
- \end_layout
- \begin_layout Subsection
- Model Combinations
- \end_layout
- \begin_layout Standard
- It is possible to combine the CM and RL models to improve coding performance
- and computational efficiency.
- For small values of
- \begin_inset Formula $K$
- \end_inset
- , the RL model tends to have similar efficiency as the CM model, but a much
- lower complexity due to the smaller number of symbols.
- For larger
- \begin_inset Formula $K$
- \end_inset
- values, the RL model tends to lose efficiency and becomes more complex.
- Because
- \begin_inset Formula $K$
- \end_inset
- is known in advance, the encoder can choose between CM and RL at run-time
- based on
- \begin_inset Formula $K$
- \end_inset
- .
- The decoder has access to the same information and can thus choose the
- same model as the encoder.
- It is even possible to use both models on the same vector, switching from
- from CM to RL once
- \begin_inset Formula $K^{\left(n\right)}$
- \end_inset
- becomes smaller than an encoder-decoder-agreed threshold
- \begin_inset Formula $K_{T}$
- \end_inset
- .
- \end_layout
- \begin_layout Section
- Coding K
- \end_layout
- \begin_layout Standard
- In some contexts, the value of
- \begin_inset Formula $K$
- \end_inset
- is agreed on between the encoder and decoder.
- If not, then we need to code
- \begin_inset Formula $K$
- \end_inset
- explicitly.
- In the proposed implementation, the pdf of
- \begin_inset Formula $K$
- \end_inset
- is adapted based on the data for
- \begin_inset Formula $K<15$
- \end_inset
- .
- For
- \begin_inset Formula $K\geq15$
- \end_inset
- , an exponentially-decaying distribution is assumed.
- The model is also conditioned on the expected value of
- \begin_inset Formula $K$
- \end_inset
- .
- \end_layout
- \begin_layout Bibliography
- \begin_inset CommandInset bibitem
- LatexCommand bibitem
- key "PVQ-draft"
- \end_inset
- J.-M.
- Valin,
- \begin_inset Quotes eld
- \end_inset
- Pyramid Vector Quantization for Video Coding
- \begin_inset Quotes erd
- \end_inset
- , IETF draft, 2013.
- http://tools.ietf.org/html/draft-valin-videocodec-pvq-00
- \end_layout
- \begin_layout Bibliography
- \begin_inset CommandInset bibitem
- LatexCommand bibitem
- key "Fischer"
- \end_inset
- T.
- R.
- Fischer,
- \begin_inset Quotes eld
- \end_inset
- A Pyramid Vector Quantizer
- \begin_inset Quotes erd
- \end_inset
- , in
- \emph on
- IEEE Trans.
- on Information Theory
- \emph default
- , Vol.
- 32, 1986, pp.
- 568-583.
- \end_layout
- \begin_layout Bibliography
- \begin_inset CommandInset bibitem
- LatexCommand bibitem
- key "FPC"
- \end_inset
- J.
- P.
- Ashley, E.
- M.
- Cruz-Zeno, U.
- Mittal, and W.
- Peng,
- \begin_inset Quotes eld
- \end_inset
- Wideband Coding of Speech Using a Scalable Pulse Codebook,
- \begin_inset Quotes erd
- \end_inset
- in
- \emph on
- Proc.
- IEEE Workshop on Speech Coding
- \emph default
- , 2000, pp.
- 148–15.
- \end_layout
- \begin_layout Bibliography
- \begin_inset CommandInset bibitem
- LatexCommand bibitem
- key "RFC6716"
- \end_inset
- Valin, J.-M., Vos.
- K., Terriberry, T.B.,
- \begin_inset Quotes eld
- \end_inset
- Definition of the Opus codec
- \begin_inset Quotes erd
- \end_inset
- , RFC 6716, Internet Engineering Task Force, 2012.
- \end_layout
- \end_body
- \end_document
|