xiph
/
daala
зеркало из https://git.xiph.org/daala.git


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344
							#LyX 2.1 created this file. For more info see http://www.lyx.org/
\lyxformat 474
\begin_document
\begin_header
\textclass article
\use_default_options true
\maintain_unincluded_children false
\language english
\language_package default
\inputencoding auto
\fontencoding global
\font_roman default
\font_sans default
\font_typewriter default
\font_math auto
\font_default_family default
\use_non_tex_fonts false
\font_sc false
\font_osf false
\font_sf_scale 100
\font_tt_scale 100
\graphics default
\default_output_format default
\output_sync 0
\bibtex_command default
\index_command default
\paperfontsize default
\spacing onehalf
\use_hyperref false
\papersize default
\use_geometry false
\use_package amsmath 1
\use_package amssymb 1
\use_package cancel 1
\use_package esint 1
\use_package mathdots 1
\use_package mathtools 1
\use_package mhchem 1
\use_package stackrel 1
\use_package stmaryrd 1
\use_package undertilde 1
\cite_engine basic
\cite_engine_type default
\biblio_style plain
\use_bibtopic false
\use_indices false
\paperorientation portrait
\suppress_date false
\justification true
\use_refstyle 1
\index Index
\shortcut idx
\color #008000
\end_index
\secnumdepth 3
\tocdepth 3
\paragraph_separation indent
\paragraph_indentation default
\quotes_language english
\papercolumns 1
\papersides 1
\paperpagestyle default
\tracking_changes false
\output_changes false
\html_math_output 0
\html_css_as_file 0
\html_be_strict false
\end_header

\begin_body

\begin_layout Title
Probability Modelling of Intra Prediction Modes
\end_layout

\begin_layout Author
Jean-Marc Valin
\end_layout

\begin_layout Section
Introduction
\end_layout

\begin_layout Standard
Modern video codecs and still image codecs make use of intra prediction
 where a region (or block) of the image is predicted based on its surrounding.
 There are usually multiple intra predictor 
\emph on
modes
\emph default
, each preforming a different kind of prediction.
 For example, some modes may predict along a particular direction, in which
 case the selected mode would typically represent the direction of the patterns
 in the region being coded.
 The mode is typically selected by the encoder and transmitted to the decoder.
 The cost of coding the mode can be large for small block sizes, so it is
 important to efficiently encode the information using entropy coding
\begin_inset space ~
\end_inset


\begin_inset CommandInset citation
LatexCommand cite
key "MW98,SM98"

\end_inset

.
 The following describes an efficient way of modelling mode probabilities.
\end_layout

\begin_layout Section
Probability Modelling
\end_layout

\begin_layout Standard
Let 
\begin_inset Formula $m_{i,j}$
\end_inset

 be the id of the intra prediction mode selected for block 
\begin_inset Formula $\left(i,j\right)$
\end_inset

.
 It is useful to know the probability 
\begin_inset Formula $p\left(m_{i,j}\right)$
\end_inset

 to make optimal use of entropy coding when encoding the selected mode in
 the bitstream.
 In particular, coding can be made more efficient by making use of the context:
 modes selected for causal neighbors that are already encoded in the bitstream.
 For example, it is desirable to estimate 
\begin_inset Formula $p\left(m_{i,j}\left|m_{i-1,j-1},m_{i,j-1},m_{i-1,j}\right.\right)$
\end_inset

.
 Let 
\begin_inset Formula $M$
\end_inset

 be the number of possible modes and 
\begin_inset Formula $N$
\end_inset

 be the number of neighbors considered, then explicit modelling of the condition
al probabilities using an explicit table requires 
\begin_inset Formula $M^{\left(N+1\right)}$
\end_inset

 entries, which rapidly becomes prohibitive.
 For example, with 10 modes and using the left, up-left and up blocks as
 context requires a table with 10,000 entries.
 This is one reason why the VP8 codec only uses the left and up blocks as
 context.
 
\end_layout

\begin_layout Standard
It is possible to reduce the size of the context by considering only whether
 the neighboring blocks use the same mode or a different mode, i.e.
 modelling the probability 
\begin_inset Formula $p\left(m_{i,j}\left|m_{i-1,j-1}=m_{i,j},m_{i,j-1}=m_{i,j},m_{i-1,j}=m_{i,j}\right.\right)$
\end_inset

 instead of 
\begin_inset Formula $p\left(m_{i,j}\left|m_{i-1,j-1},m_{i,j-1},m_{i-1,j}\right.\right)$
\end_inset

.
 Because the conditional parameters are now binary (equal or not equal),
 then a lookup table only requires 
\begin_inset Formula $M\cdot2^{N}$
\end_inset

 entries, e.g., only 80
\begin_inset space ~
\end_inset

entries when using 3
\begin_inset space ~
\end_inset

neighbours.
 One drawback of this approach is that
\begin_inset Formula 
\begin{equation}
S_{p}=\sum_{k=0}^{M}p\left(m_{i,j}=k\left|m_{i-1,j-1}=k,m_{i,j-1}=k,m_{i-1,j}=k\right.\right)\neq1
\end{equation}

\end_inset

so some form of renormalization is required.
\end_layout

\begin_layout Standard
The simplest way to renormalize probabilities is to multiply each of them
 by the same factor such that the sum equals 1.
 If the entropy coder, such as a classic non-binary arithmetic coder or
 a range coder, is set up to code symbols using cumulative frequency counts,
 then simply declaring 
\begin_inset Formula $S_{p}$
\end_inset

 to be the 
\emph on
total frequency count
\emph default
 allows the entropy coder to perform the renormalization process automatically
 as part of the coding process.
 This method is the simplest, but sometimes does not accurately model probabilit
ies when one of the probabilities is close to unity.
 
\end_layout

\begin_layout Standard
A slightly more bit-efficient renormalization procedure is to first 
\begin_inset Quotes eld
\end_inset

amplify
\begin_inset Quotes erd
\end_inset

 probabilities that are close to unity:
\begin_inset Formula 
\begin{equation}
p_{k}^{'}=p_{k}\cdot\frac{\left(\sum_{j}p_{j}\right)-p_{k}}{1-p_{k}}\,,\label{eq:weighted-renorm}
\end{equation}

\end_inset

followed by renormalizing the 
\begin_inset Formula $p_{k}^{'}$
\end_inset

 to have a sum of 1.
 The step in 
\begin_inset CommandInset ref
LatexCommand ref
reference "eq:weighted-renorm"

\end_inset

 results in probabilities close to 1 being less affected by the renormalization
 and slightly improves coding efficiency over the simple renormalization.
\end_layout

\begin_layout Subsection
Adaptation
\end_layout

\begin_layout Standard
There are two possible ways of adapting the probability model to individual
 images being coded.
 The first is to compute an online probability 
\begin_inset Formula $p_{0}\left(m\right)$
\end_inset

 of mode 
\begin_inset Formula $m$
\end_inset

 being selected when none of the neighbours use mode 
\begin_inset Formula $m$
\end_inset

.
 Then, 
\begin_inset Formula $p_{0}\left(m\right)$
\end_inset

 is used as a floor probability for each mode 
\begin_inset Formula $m$
\end_inset

.
 The purpose of this floor is to ensure that when a particular mode is highly
 used in an image, its cost goes down.
\end_layout

\begin_layout Standard
The second way of adapting the probability model is to compute a statistic
 on a large number of previously encoded modes.
 For example, this can be the most often used mode in the frame, the percentage
 of non-directional modes used, the most common direction.
 This statistic can be used as an additional condition on the probability.
 In that case, the storage requirement becomes 
\begin_inset Formula $M\cdot2^{N}S$
\end_inset

 where 
\begin_inset Formula $S$
\end_inset

 is the number of possible discrete value for the statistic.
 
\end_layout

\begin_layout Bibliography
\begin_inset CommandInset bibitem
LatexCommand bibitem
key "MW98"

\end_inset

Moffat, A., Witten, I.H., 
\begin_inset Quotes eld
\end_inset

Arithmetic coding revisited
\begin_inset Quotes erd
\end_inset

, 
\emph on
ACM Transactions on Information Systems (TOIS)
\emph default
, Vol.
 16, Issue 3, pp.
 256-294 , July 1998.
\end_layout

\begin_layout Bibliography
\begin_inset CommandInset bibitem
LatexCommand bibitem
key "SM98"

\end_inset

Stuiver, L.
 and Moffat, A., "Piecewise Integer Mapping for Arithmetic Coding", 
\emph on
Proc.
 of the 17th IEEE Data Compression Conference (DCC)
\emph default
, pp.
 1-10, March/April 1998.
\end_layout

\end_body
\end_document