xiph
/
daala
mirror de https://git.xiph.org/daala.git


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787
							#LyX 2.1 created this file. For more info see http://www.lyx.org/
\lyxformat 474
\begin_document
\begin_header
\textclass article
\use_default_options true
\maintain_unincluded_children false
\language english
\language_package default
\inputencoding auto
\fontencoding global
\font_roman default
\font_sans default
\font_typewriter default
\font_math auto
\font_default_family default
\use_non_tex_fonts false
\font_sc false
\font_osf false
\font_sf_scale 100
\font_tt_scale 100
\graphics default
\default_output_format default
\output_sync 0
\bibtex_command default
\index_command default
\paperfontsize default
\spacing single
\use_hyperref false
\papersize default
\use_geometry true
\use_package amsmath 1
\use_package amssymb 1
\use_package cancel 1
\use_package esint 1
\use_package mathdots 1
\use_package mathtools 1
\use_package mhchem 1
\use_package stackrel 1
\use_package stmaryrd 1
\use_package undertilde 1
\cite_engine basic
\cite_engine_type default
\biblio_style plain
\use_bibtopic false
\use_indices false
\paperorientation portrait
\suppress_date false
\justification true
\use_refstyle 1
\index Index
\shortcut idx
\color #008000
\end_index
\leftmargin 3cm
\topmargin 3cm
\rightmargin 3cm
\bottommargin 3cm
\secnumdepth 3
\tocdepth 3
\paragraph_separation indent
\paragraph_indentation default
\quotes_language english
\papercolumns 1
\papersides 1
\paperpagestyle default
\tracking_changes false
\output_changes false
\html_math_output 0
\html_css_as_file 0
\html_be_strict false
\end_header

\begin_body

\begin_layout Title
Jmspeex' Journal of Dubious Theoretical Results
\end_layout

\begin_layout Abstract
This is a log of theoretical calculations and approximations that are used
 in some of the Daala code.
 Some approximations are likely to be too coarse, some assumptions may not
 correspond to the observable universe and some calculations may just be
 plain wrong.
 You have been warned.
\end_layout

\begin_layout Part
Relationship Between 
\begin_inset Formula $\lambda$
\end_inset

 and 
\begin_inset Formula $Q$
\end_inset

 in RDO
\end_layout

\begin_layout Standard
When using a high-rate scalar quantizer, the distortion is given by
\begin_inset Formula 
\[
D=\frac{Q^{2}}{12}\ ,
\]

\end_inset

where 
\begin_inset Formula $Q$
\end_inset

 is the quantizer's interval between two levels (not the maximum error like
 in some other work).
 The rate required to code the quantized values (assuming round-to-nearest)
 is
\begin_inset Formula 
\[
R=-\log_{2}Q+C
\]

\end_inset

where 
\begin_inset Formula $C$
\end_inset

 is a constant that does not depend on Q.
 Starting from a known 
\begin_inset Formula $\lambda$
\end_inset

 we want to find the quantization interval 
\begin_inset Formula $Q$
\end_inset

 that minimizes the rate-distortion curve, so
\begin_inset Formula 
\begin{align*}
\frac{\partial}{\partial Q}\left(D+\lambda R\right) & =0\\
\frac{\partial}{\partial Q}\left(\frac{Q^{2}}{12}-\lambda\log_{2}Q-\lambda C\right) & =0\\
\frac{Q}{6}-\frac{\lambda}{Q\log2} & =0\\
Q & =\sqrt{\frac{6\lambda}{\log2}}
\end{align*}

\end_inset

Or, if 
\begin_inset Formula $Q$
\end_inset

 is known, then
\begin_inset Formula 
\[
\lambda=\frac{Q^{2}\log2}{6}
\]

\end_inset


\end_layout

\begin_layout Section*
Quantization threshold
\end_layout

\begin_layout Standard
When we have a value between 0 and 1 and consider whether to round up or
 down, we can compute the optimal decision threshold 
\begin_inset Formula $x$
\end_inset

 for which the RD cost for the decision is equal
\begin_inset Formula 
\[
x^{2}+\lambda R_{0}=\left(1-x\right)^{2}+\lambda R_{1}\ ,
\]

\end_inset

where 
\begin_inset Formula $R_{0}$
\end_inset

 and 
\begin_inset Formula $R_{1}$
\end_inset

 are the costs for coding a zero and a one, respectively.
 Solving for 
\begin_inset Formula $x$
\end_inset

 we have
\begin_inset Formula 
\begin{align*}
x^{2}+\lambda R_{0} & =\left(1-x\right)^{2}+\lambda R_{1}\\
x^{2}+\lambda R_{0} & =x^{2}-2x+1+\lambda R_{1}\\
2x & =1+\lambda\left(R_{1}-R_{0}\right)\\
x & =\frac{1}{2}+\frac{\lambda\Delta R}{2}\ ,
\end{align*}

\end_inset

where 
\begin_inset Formula $\Delta R=R_{1}-R_{0}$
\end_inset

.
 In other words, it's like round-to-nearest, but with an additional bias
 of 
\begin_inset Formula $\lambda\Delta R/2$
\end_inset

 towards zero.
 
\end_layout

\begin_layout Part
Rate-Distortion Analysis of a Quantized Laplace Distribution
\end_layout

\begin_layout Standard
Here we assume that the quantization step size has already been taken into
 account and that 
\begin_inset Formula $\sigma$
\end_inset

 is the normalized standard deviation of a DCT coefficient.
 The post-quantization distribution of a Laplace-distributed variable with
 non-zero quantization threshold 
\begin_inset Formula $\theta$
\end_inset

 is:
\begin_inset Formula 
\[
p\left(n\right)=\begin{cases}
1-r^{\theta} & n=0\\
r^{\theta}\left(1-r\right)r^{n-1} & n>0
\end{cases}
\]

\end_inset

where 
\begin_inset Formula $\theta=\frac{1}{2}$
\end_inset

 for round-to-nearest and 
\begin_inset Formula $r=e^{-\sqrt{2}/\sigma}$
\end_inset

.
 The entropy (rate) 
\begin_inset Formula $R$
\end_inset

 of the quantized Laplace distribution is:
\begin_inset Formula 
\[
R=\overset{\mathrm{sign}}{\overbrace{r^{\theta}}}+\overset{\mathrm{non-zero}}{\overbrace{H\left(r^{\theta}\right)}}+\overset{\mathrm{tail}}{\overbrace{\frac{r^{\theta}H\left(r\right)}{1-r}}}
\]

\end_inset


\end_layout

\begin_layout Standard
\begin_inset Formula 
\begin{align*}
D(r) & =-\log r\left(\int_{0}^{\theta}x^{2}r^{x}dx+\sum_{k=1}^{\infty}\int_{\theta-1}^{\theta}x^{2}r^{k}r^{x}dx\right)\\
 & =I\left(r,\theta\right)-I\left(r,0\right)+\frac{r}{1-r}\left(I\left(r,\theta\right)-I\left(r,\theta-1\right)\right)
\end{align*}

\end_inset

where
\begin_inset Formula 
\begin{align*}
I\left(r,x\right) & =-\log r\int x^{2}r^{x}dx\\
 & =r^{x}\frac{2x\log r-x^{2}\log^{2}r-2}{\log^{2}r}+C
\end{align*}

\end_inset


\end_layout

\begin_layout Standard
When 
\begin_inset Formula $\sigma$
\end_inset

 is much smaller than the quantization step size (everything quantizes to
 zero), then the distortion is simply 
\begin_inset Formula $\sigma^{2}$
\end_inset

 and when 
\begin_inset Formula $\sigma$
\end_inset

 is very large (flat distribution), then the distortion is that of a scalar
 quantizer: 
\begin_inset Formula $1/12$
\end_inset

.
 So in the general case we can approximate with
\begin_inset Formula 
\[
D=\min\left(\sigma^{2},\frac{1}{12}\right)
\]

\end_inset

which tends to overestimate 
\begin_inset Formula $D$
\end_inset

 in the region where 
\begin_inset Formula $\sigma^{2}$
\end_inset

 is close to 
\begin_inset Formula $1/12$
\end_inset

.
 Assuming high-rate RDO, we have 
\begin_inset Formula 
\[
\lambda=\frac{Q^{2}\log(2)}{6}=\frac{\log(2)}{6}\ .
\]

\end_inset

The total RD-cost (expressed as a rate) becomes
\begin_inset Formula 
\[
R+\frac{D}{\lambda}=r^{\theta}+H\left(r^{\theta}\right)+\frac{r^{\theta}H\left(r\right)}{1-r}+\min\left(\frac{6\sigma^{2}}{\log(2)},\frac{1}{2\log(2)}\right)
\]

\end_inset

This cost function can be approximated by the (smoother) cost function
\begin_inset Formula 
\[
C=\frac{1}{2}\log_{2}\left(1+\left(6.33\sigma\right)^{2}\right)
\]

\end_inset


\end_layout

\begin_layout Part
PVQ Distortion
\end_layout

\begin_layout Standard
Let 
\begin_inset Formula $\mathbf{X}=g\mathbf{z}$
\end_inset

 and 
\begin_inset Formula $\hat{\mathbf{X}}=\hat{g}\hat{\mathbf{z}}$
\end_inset

 be the unquantized and quantized coefficient vector, respectively.
 The quantization distortion is 
\begin_inset Formula 
\begin{align}
D & =\left(\mathbf{X}-\hat{\mathbf{X}}\right)^{T}\left(\mathbf{X}-\hat{\mathbf{X}}\right)\nonumber \\
 & =\mathbf{X}^{T}\mathbf{X}+\hat{\mathbf{X}}^{T}\hat{\mathbf{X}}-2\hat{\mathbf{X}}^{T}\mathbf{X}\nonumber \\
 & =g^{2}+\hat{g}^{2}-2g\hat{g}\mathbf{z}\hat{\mathbf{z}}\nonumber \\
 & =\left(g-\hat{g}\right)^{2}+2g\hat{g}-2g\hat{g}\mathbf{z}\hat{\mathbf{z}}\nonumber \\
 & =\left(g-\hat{g}\right)^{2}+g\hat{g}\left(2-2\mathbf{z}\hat{\mathbf{z}}\right)\ .\label{eq:distance_v1}
\end{align}

\end_inset

Let 
\begin_inset Formula $D_{z}$
\end_inset

 be the distance between 
\family roman
\series medium
\shape up
\size normal
\emph off
\bar no
\strikeout off
\uuline off
\uwave off
\noun off
\color none

\begin_inset Formula $\mathbf{z}$
\end_inset

 and 
\begin_inset Formula $\hat{\mathbf{z}}$
\end_inset

,
\begin_inset Formula 
\begin{align}
D_{z} & =\left(\mathbf{z}-\hat{\mathbf{z}}\right)^{T}\left(\mathbf{z}-\hat{\mathbf{z}}\right)\nonumber \\
 & =2-2\mathbf{z}^{T}\hat{\mathbf{z}}\ .\label{eq:distance-Dz}
\end{align}

\end_inset

We can then rewrite 
\begin_inset CommandInset ref
LatexCommand eqref
reference "eq:distance_v1"

\end_inset

 as
\begin_inset Formula 
\begin{equation}
D=\left(g-\hat{g}\right)^{2}+g\hat{g}D_{z}\ ,\label{eq:pvq_distortion}
\end{equation}

\end_inset

which separates the gain quantization from the quantization of the unit
 vector 
\begin_inset Formula $\mathbf{z}$
\end_inset

.
\end_layout

\begin_layout Part
Distortion from theta PVQ
\end_layout

\begin_layout Standard
Let the normalized theta-PVQ vector be 
\begin_inset Formula 
\[
\mathbf{z}=\left[\begin{array}{c}
\cos\theta\\
\mathbf{x}\sin\theta
\end{array}\right]\ ,
\]

\end_inset

where 
\begin_inset Formula $\mathbf{x}$
\end_inset

 is the unit-vector coded with the PVQ quantizer, the distortion 
\begin_inset Formula $D$
\end_inset

 between the unquantized 
\begin_inset Formula $\mathbf{z}$
\end_inset

 and its quantized version 
\begin_inset Formula $\hat{\mathbf{z}}$
\end_inset

 is
\begin_inset Formula 
\begin{align}
D_{z} & =\left(\mathbf{z}-\hat{\mathbf{z}}\right)^{T}\left(\mathbf{z}-\hat{\mathbf{z}}\right)\nonumber \\
 & =\left(\cos\theta-\cos\hat{\theta}\right)^{2}+\left(\mathbf{x}\sin\theta-\hat{\mathbf{x}}\sin\hat{\theta}\right)^{T}\left(\mathbf{x}\sin\theta-\hat{\mathbf{x}}\sin\hat{\theta}\right)\nonumber \\
 & =\cos^{2}\theta-2\cos\theta\cos\hat{\theta}+\cos^{2}\hat{\theta}+\sin^{2}\theta+\sin^{2}\hat{\theta}-2\sin\theta\sin\hat{\theta}\mathbf{x}^{T}\hat{\mathbf{x}}\nonumber \\
 & =2-2\cos\theta\cos\hat{\theta}-2\sin\theta\sin\hat{\theta}\mathbf{x}^{T}\hat{\mathbf{x}}\ .\label{eq:distortion-1}
\end{align}

\end_inset


\family roman
\series medium
\shape up
\size normal
\emph off
\bar no
\strikeout off
\uuline off
\uwave off
\noun off
\color none
Using the identity 
\begin_inset CommandInset ref
LatexCommand eqref
reference "eq:distance-Dz"

\end_inset

, we can then rewrite 
\begin_inset CommandInset ref
LatexCommand eqref
reference "eq:distortion-1"

\end_inset

 as
\begin_inset Formula 
\begin{align}
D_{z} & =2-2\cos\theta\cos\hat{\theta}-\sin\theta\sin\hat{\theta}\left(2-D_{x}\right)\nonumber \\
 & =2-2\cos\left(\theta-\hat{\theta}\right)+\sin\theta\sin\hat{\theta}D_{x}\nonumber \\
 & =D_{\theta}+\sin\theta\sin\hat{\theta}D_{x}\ ,\label{eq:distortion-final}
\end{align}

\end_inset

where 
\begin_inset Formula $D_{\theta}=2-2\cos\left(\theta-\hat{\theta}\right)$
\end_inset

 is the mean square error due to quantizing 
\begin_inset Formula $\theta$
\end_inset

.
 So essentially, the total error is the sum of the error due to quantization
 of 
\begin_inset Formula $\theta$
\end_inset

 and the error in the PVQ quantization assuming a radius that's the geometric
 mean of the quantized and unquantized radius.
\end_layout

\begin_layout Standard
Putting 
\begin_inset CommandInset ref
LatexCommand eqref
reference "eq:distortion-final"

\end_inset

 into 
\begin_inset CommandInset ref
LatexCommand eqref
reference "eq:pvq_distortion"

\end_inset

, we obtain
\begin_inset Formula 
\begin{equation}
D=\left(g-\hat{g}\right)^{2}+g\hat{g}\left(D_{\theta}+\sin\theta\sin\hat{\theta}D_{x}\right)\ .\label{eq:total_pvq_theta_dist}
\end{equation}

\end_inset


\end_layout

\begin_layout Part
Biorthogonality and quantization noise
\end_layout

\begin_layout Standard
A biorthogonal transform defined as 
\begin_inset Formula 
\[
\mathbf{H}\mathbf{G}^{T}=I
\]

\end_inset

where the columns of 
\begin_inset Formula $\mathbf{G}$
\end_inset

 are the analysis basis functions and the columns of 
\begin_inset Formula $\mathbf{H}$
\end_inset

 are the columns of the synthesis basis functions.
 We define the diagonal matrix 
\begin_inset Formula $\mathbf{S}$
\end_inset

 such that the diagonal elements 
\begin_inset Formula $s_{i,i}=\sqrt{\mathbf{h}_{i}^{T}\mathbf{h}_{i}}$
\end_inset

 are the magnitudes of the synthesis basis functions.
 For an input vector 
\begin_inset Formula $\mathbf{x}$
\end_inset

, the quantization process can be modeled as adding an uncorrelated noise
 vector 
\begin_inset Formula $\mathbf{n}$
\end_inset

 such that the reconstruction 
\begin_inset Formula $\mathbf{y}$
\end_inset

 is
\begin_inset Formula 
\begin{align*}
\mathbf{y} & =\mathbf{H}\mathbf{S}^{-1}\left(\mathbf{S}\mathbf{G}^{T}\mathbf{x}+\mathbf{n}\right)\\
 & =\mathbf{x}+\mathbf{H}\mathbf{S}^{-1}\mathbf{n}
\end{align*}

\end_inset


\end_layout

\begin_layout Standard
The distortion due to quantization is 
\begin_inset Formula 
\begin{align*}
D & =\left(\mathbf{x}-\mathbf{y}\right)^{T}\left(\mathbf{x}-\mathbf{y}\right)\\
 & =\left(\mathbf{H}\mathbf{S}^{-1}\mathbf{n}\right)^{T}\mathbf{H}\mathbf{S}^{-1}\mathbf{n}\\
 & =\mathbf{n}^{T}\left(\mathbf{S}^{T}\right)^{-1}\mathbf{H}^{T}\mathbf{H}\mathbf{S}^{-1}\mathbf{n}\\
 & =\mathbf{n}^{T}\mathbf{R}\mathbf{n}
\end{align*}

\end_inset

where 
\begin_inset Formula $\mathbf{R}=\left(\mathbf{S}^{-1}\right)^{T}\mathbf{H}^{T}\mathbf{H}\mathbf{S}^{-1}$
\end_inset

.
 Rewriting 
\begin_inset Formula $D$
\end_inset

 as a summation, we have 
\begin_inset Formula 
\[
D=\sum_{i}\sum_{j}r_{i,i}n_{i}n_{j}
\]

\end_inset

This is different from the orthonormal case where
\begin_inset Formula 
\[
D=\sum_{i}n_{i}^{2}
\]

\end_inset

due to 
\begin_inset Formula $\mathbf{R}$
\end_inset

 being the identity matrix (also known as Parseval's theorem).
 
\end_layout

\begin_layout Standard
Since the noise is uncorrelated and since the diagonal of 
\begin_inset Formula $\mathbf{R}$
\end_inset

 is equal to 1, then the expectation of the noise power is 
\begin_inset Formula $E\{D\}=E\{\mathbf{n}^{T}\mathbf{n}\}$
\end_inset

, like in the orthonormal case.
 This shows that multiplying each transformed coefficient 
\begin_inset Formula $x_{i}$
\end_inset

 by the magnitude of the corresponding synthesis function 
\begin_inset Formula $s_{i,i}$
\end_inset

 prior to quantization is sufficient to obtain the same 
\emph on
average
\emph default
 noise behaviour.
 In practice, it 
\emph on
may
\emph default
 be possible to do some clever trick in the quantization search to obtain
 a smaller distortion than the orthonormal case, partially compensating
 for the entropy cost of the biorthogonal transform.
\end_layout

\begin_layout Standard
The average distortion we find also supports the use of the squared magnitude
 of the synthesis basis function in the computation of the coding gain.
\end_layout

\begin_layout Part
Temporal RDO
\end_layout

\begin_layout Standard
Let's assume two sequences of 
\begin_inset Formula $N$
\end_inset

 samples each being quantized with a resolution 
\begin_inset Formula $Q$
\end_inset

.
 One sequence is constant, while the other isn't.
 The total distortion will be
\begin_inset Formula 
\[
D=\frac{2NQ^{2}}{12}\ .
\]

\end_inset


\end_layout

\begin_layout Standard
If we assume that we can skip encoding of the constant sequence at no cost
 and shift 
\begin_inset Formula $b$
\end_inset

 bits away from the variable sequence to the constant one, it costs only
 
\begin_inset Formula $b/N$
\end_inset

 bits per sample on the variable sequence and we get a distortion
\begin_inset Formula 
\begin{align*}
D & =N\frac{\left(2^{-b}Q\right)^{2}}{12}+N\frac{\left(2^{b/N}Q\right)^{2}}{12}\\
 & =\frac{NQ^{2}}{12}\left(2^{-2b}+2^{2b/N}\right)
\end{align*}

\end_inset

We solve for 
\begin_inset Formula $\partial D/\partial b=0$
\end_inset

 to minimize distortion:
\begin_inset Formula 
\begin{gather*}
\frac{\partial D}{\partial b}=\frac{NQ^{2}}{12}\left(-2b\log22^{-2b}+\frac{2b\log2}{N}2^{2b/N}\right)=0\\
\frac{2b2^{2b/N}\log2}{N}=2b2^{-2b}\log2\\
\frac{2^{2b/N}}{N}=2^{-2b}
\end{gather*}

\end_inset

Taking the base-2 log on both side:
\begin_inset Formula 
\begin{align*}
2b/N-\log_{2}N & =-2b\\
\frac{2b\left(N+1\right)}{N} & =\log_{2}N\\
b & =\frac{N\log_{2}N}{2\left(N+1\right)}
\end{align*}

\end_inset


\end_layout

\begin_layout Section*
Slowly varying sequence
\end_layout

\begin_layout Standard
Let's see what happens with a slowly varying sequence rather than a constant
 one...
\end_layout

\end_body
\end_document