video_pvq.lyx 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859
  1. #LyX 2.0 created this file. For more info see http://www.lyx.org/
  2. \lyxformat 413
  3. \begin_document
  4. \begin_header
  5. \textclass article
  6. \use_default_options true
  7. \maintain_unincluded_children false
  8. \language english
  9. \language_package default
  10. \inputencoding auto
  11. \fontencoding global
  12. \font_roman default
  13. \font_sans default
  14. \font_typewriter default
  15. \font_default_family default
  16. \use_non_tex_fonts false
  17. \font_sc false
  18. \font_osf false
  19. \font_sf_scale 100
  20. \font_tt_scale 100
  21. \graphics default
  22. \default_output_format default
  23. \output_sync 0
  24. \bibtex_command default
  25. \index_command default
  26. \paperfontsize default
  27. \use_hyperref false
  28. \papersize default
  29. \use_geometry false
  30. \use_amsmath 1
  31. \use_esint 1
  32. \use_mhchem 1
  33. \use_mathdots 1
  34. \cite_engine basic
  35. \use_bibtopic false
  36. \use_indices false
  37. \paperorientation portrait
  38. \suppress_date false
  39. \use_refstyle 1
  40. \index Index
  41. \shortcut idx
  42. \color #008000
  43. \end_index
  44. \secnumdepth 3
  45. \tocdepth 3
  46. \paragraph_separation indent
  47. \paragraph_indentation default
  48. \quotes_language english
  49. \papercolumns 1
  50. \papersides 1
  51. \paperpagestyle default
  52. \tracking_changes false
  53. \output_changes false
  54. \html_math_output 0
  55. \html_css_as_file 0
  56. \html_be_strict false
  57. \end_header
  58. \begin_body
  59. \begin_layout Title
  60. Energy Preservation in PVQ-Based Video Coding
  61. \end_layout
  62. \begin_layout Author
  63. Jean-Marc Valin
  64. \end_layout
  65. \begin_layout Section
  66. Introduction
  67. \end_layout
  68. \begin_layout Standard
  69. This mini-paper describes a proposal for adapting the CELT energy conservation
  70. principle to video coding based on a pyramid vector quantizer (PVQ).
  71. One potential advantage of conserving energy of the AC coefficients in
  72. video coding is preserving textures rather than low-passing them.
  73. Also, by introducing a fixed-resolution PVQ-type quantizer, we automatically
  74. gain a simple activity masking model.
  75. \end_layout
  76. \begin_layout Standard
  77. The main challenge of adapting this scheme to video is that we have a good
  78. prediction (the reference frame), so we are essentially starting from a
  79. point that is already on the PVQ hyper-sphere, rather than at the origin
  80. like in CELT.
  81. Other challenges are the introduction of a quantization matrix and the
  82. fact that we want the reference (motion predicted) data to perfectly correspond
  83. to one of the entries in our codebook.
  84. \end_layout
  85. \begin_layout Section
  86. Encoder
  87. \end_layout
  88. \begin_layout Standard
  89. Let vector
  90. \begin_inset Formula $\mathbf{x}_{d}$
  91. \end_inset
  92. denote the (pre-normalization) DCT band to be coded in the current block
  93. and vector
  94. \begin_inset Formula $\mathbf{r}_{d}$
  95. \end_inset
  96. denote the corresponding reference after motion compensation, the encoder
  97. computes and encodes the
  98. \begin_inset Quotes eld
  99. \end_inset
  100. band gain
  101. \begin_inset Quotes erd
  102. \end_inset
  103. \begin_inset Formula
  104. \begin{equation}
  105. g=\sqrt{\mathbf{x}_{d}^{T}\mathbf{x}_{d}}\,.\label{eq:band-energy}
  106. \end{equation}
  107. \end_inset
  108. \end_layout
  109. \begin_layout Standard
  110. Let
  111. \begin_inset Formula $\mathbf{Q}$
  112. \end_inset
  113. be a diagonal matrix containing the quantization step size for each element
  114. of
  115. \begin_inset Formula $\mathbf{x}_{d}$
  116. \end_inset
  117. , the normalized band is computed as
  118. \begin_inset Formula
  119. \begin{equation}
  120. \mathbf{x}=\frac{\mathbf{Q}^{-1}\mathbf{x}_{d}}{\left\Vert \mathbf{Q}^{-1}\mathbf{x}_{d}\right\Vert }\,,\label{eq:normalized-x}
  121. \end{equation}
  122. \end_inset
  123. with the normalized reference
  124. \begin_inset Formula $\mathbf{r}$
  125. \end_inset
  126. similarly computed based on
  127. \begin_inset Formula $\mathbf{r}_{d}$
  128. \end_inset
  129. .
  130. The encoder then finds the position and sign of the maximum value in
  131. \begin_inset Formula $\mathbf{r}$
  132. \end_inset
  133. \begin_inset Formula
  134. \begin{align}
  135. m & =\underset{i}{\mathrm{argmax}}\left|r_{i}\right|\label{eq:reflection-argmax}\\
  136. s & =\mathrm{sgn}\left(r_{m}\right)\label{eq:reflection-sign}
  137. \end{align}
  138. \end_inset
  139. and computes the Householder reflection that reflects
  140. \begin_inset Formula $\mathbf{r}$
  141. \end_inset
  142. to
  143. \begin_inset Formula $-s\mathbf{e}_{m}$
  144. \end_inset
  145. .
  146. The reflection vector is given by
  147. \begin_inset Formula
  148. \begin{equation}
  149. \mathbf{v}=\mathbf{r}+s\mathbf{e}_{m}\,.\label{eq:reflection-vector}
  150. \end{equation}
  151. \end_inset
  152. The encoder reflects the normalized band to find
  153. \begin_inset Formula
  154. \begin{equation}
  155. \mathbf{z}=\mathbf{x}-\frac{2}{\mathbf{v}^{T}\mathbf{v}}\mathbf{v}\left(\mathbf{v}^{T}\mathbf{x}\right)\,.\label{eq:reflection}
  156. \end{equation}
  157. \end_inset
  158. \end_layout
  159. \begin_layout Standard
  160. The similarity between the current band and the reference band is represented
  161. by the angle (assuming no quantization)
  162. \end_layout
  163. \begin_layout Standard
  164. \begin_inset Formula
  165. \begin{equation}
  166. \theta=\arccos\frac{-sz_{m}}{\left\Vert \mathbf{z}\right\Vert }\ .\label{eq:unquant-theta}
  167. \end{equation}
  168. \end_inset
  169. Let
  170. \begin_inset Formula $N$
  171. \end_inset
  172. be the number of dimensions in
  173. \begin_inset Formula $\mathbf{x}$
  174. \end_inset
  175. and
  176. \begin_inset Formula $K$
  177. \end_inset
  178. be the number of pulses in our codebooks, we search for the codebook entry
  179. \begin_inset Formula
  180. \begin{equation}
  181. q=\underset{i}{\mathrm{argmax}}\frac{\mathbf{p}_{i}^{T}\left(\mathbf{z}+sz_{m}\mathbf{e}_{m}\right)}{\sqrt{\mathbf{p}_{i}^{T}\mathbf{p}_{i}}}\,,\label{eq:quantization}
  182. \end{equation}
  183. \end_inset
  184. where
  185. \begin_inset Formula $\mathbf{p}_{i}$
  186. \end_inset
  187. is the
  188. \begin_inset Formula $i^{th}$
  189. \end_inset
  190. combination of magnitudes and signs that satisfies
  191. \begin_inset Formula $\left\Vert \mathbf{p}_{i}\right\Vert _{L1}=K$
  192. \end_inset
  193. .
  194. Let
  195. \begin_inset Formula $\theta_{opt}$
  196. \end_inset
  197. be the post-quantization optimal angle, the mean square error becomes
  198. \begin_inset Formula
  199. \begin{equation}
  200. E=\left(-s\cos\theta_{opt}\mathbf{e}_{m}+\sin\theta_{opt}\hat{\mathbf{p}}-\mathbf{z}\right)^{2}\ ,\label{eq:error-theta}
  201. \end{equation}
  202. \end_inset
  203. where
  204. \begin_inset Formula $\hat{\mathbf{p}}=\mathbf{p}_{q}/\sqrt{\mathbf{p}_{q}^{T}\mathbf{p}_{q}}$
  205. \end_inset
  206. Solving for
  207. \begin_inset Formula $\frac{\partial E}{\partial\theta}=0$
  208. \end_inset
  209. and knowing that
  210. \begin_inset Formula $\hat{\mathbf{p}}^{T}\mathbf{e}_{m}=0$
  211. \end_inset
  212. , we have
  213. \end_layout
  214. \begin_layout Standard
  215. \begin_inset Formula
  216. \begin{align}
  217. \frac{\partial}{\partial\theta}\left(-s\cos\theta_{opt}\mathbf{e}_{m}+\sin\theta_{opt}\hat{\mathbf{p}}\right)\mathbf{z} & =0\nonumber \\
  218. \left(s\sin\theta_{opt}\mathbf{e}_{m}+\cos\theta_{opt}\hat{\mathbf{p}}\right)\mathbf{z} & =0\nonumber \\
  219. \sin\theta_{opt}\mathbf{e}_{m}\mathbf{z} & =-s\cos\theta_{opt}\hat{\mathbf{p}}\mathbf{z}\nonumber \\
  220. \theta_{opt} & =-s\arctan\frac{\hat{\mathbf{p}}\mathbf{z}}{\mathbf{e}_{m}\mathbf{z}}\ .\label{eq:theta-opt}
  221. \end{align}
  222. \end_inset
  223. \end_layout
  224. \begin_layout Standard
  225. The resolution of the gain and angle, as well as the number of pulses should
  226. all be derived from a single quality parameter.
  227. The encoder transmits the gain
  228. \begin_inset Formula $g$
  229. \end_inset
  230. , the quantized angle
  231. \begin_inset Formula $\hat{\theta}$
  232. \end_inset
  233. , and the
  234. \begin_inset Formula $\mathbf{p}_{q}$
  235. \end_inset
  236. vector.
  237. Neither
  238. \begin_inset Formula $s$
  239. \end_inset
  240. nor
  241. \begin_inset Formula $m$
  242. \end_inset
  243. need to be transmitted since they can be obtained from the decoder.
  244. Encoding
  245. \begin_inset Formula $\mathbf{p}_{q}$
  246. \end_inset
  247. should make use of the fact that
  248. \begin_inset Formula $K$
  249. \end_inset
  250. is known and is left as an exercise to the implementer.
  251. \end_layout
  252. \begin_layout Section
  253. Decoder
  254. \end_layout
  255. \begin_layout Standard
  256. The decoder starts by decoding the codebook entry
  257. \begin_inset Formula $\mathbf{p}_{q}$
  258. \end_inset
  259. and uses it to reconstruct the unit-norm reflected band as
  260. \begin_inset Formula
  261. \begin{equation}
  262. \hat{\mathbf{z}}=-s\cos\hat{\theta}\mathbf{e}_{m}+\sin\hat{\theta}\frac{\mathbf{p}_{q}}{\sqrt{\mathbf{p}_{q}^{T}\mathbf{p}_{q}}}\,.\label{eq:reconstruct}
  263. \end{equation}
  264. \end_inset
  265. Because the decoder has access to exactly the same reference as the encoder,
  266. it is able to apply
  267. \begin_inset CommandInset ref
  268. LatexCommand eqref
  269. reference "eq:reflection-argmax"
  270. \end_inset
  271. -
  272. \begin_inset CommandInset ref
  273. LatexCommand eqref
  274. reference "eq:reflection-vector"
  275. \end_inset
  276. to obtain the same
  277. \begin_inset Formula $\mathbf{v}$
  278. \end_inset
  279. as used in the encoder.
  280. The decoded normalized band is
  281. \begin_inset Formula
  282. \begin{equation}
  283. \hat{\mathbf{x}}=\hat{\mathbf{z}}-\frac{2}{\mathbf{v}^{T}\mathbf{v}}\mathbf{v}\left(\mathbf{v}^{T}\hat{\mathbf{x}}\right)\,.\label{eq:decoder-reflection}
  284. \end{equation}
  285. \end_inset
  286. \end_layout
  287. \begin_layout Standard
  288. The renormalized band is computed by taking into account the quantization
  289. resolution:
  290. \begin_inset Formula
  291. \begin{equation}
  292. \hat{\mathbf{x}}_{d}=\hat{g}\frac{\mathbf{Q}\hat{\mathbf{x}}}{\left\Vert \mathbf{Q}\hat{\mathbf{x}}\right\Vert }\,.\label{eq:decoded-band}
  293. \end{equation}
  294. \end_inset
  295. \end_layout
  296. \begin_layout Section
  297. Coding Resolution
  298. \end_layout
  299. \begin_layout Standard
  300. It is desirable for a single quality parameter to control
  301. \begin_inset Formula $K$
  302. \end_inset
  303. and the resolution of gain and angle.
  304. That quality parameter should also take into account activity masking to
  305. some extent.
  306. According to Jason Garrett-Glaser, x264's activity masking uses a resolution
  307. proportional to the
  308. \begin_inset Formula $g^{2\alpha}$
  309. \end_inset
  310. , with
  311. \begin_inset Formula $\alpha=0.173$
  312. \end_inset
  313. .
  314. We can derive a scalar quantizer that follows this resolution:
  315. \begin_inset Formula
  316. \begin{equation}
  317. \hat{g}=Q_{g}\gamma^{1+2\alpha}\ ,\label{eq:gain-scalar-quantization}
  318. \end{equation}
  319. \end_inset
  320. where
  321. \begin_inset Formula $\gamma$
  322. \end_inset
  323. is the gain quantization index and
  324. \begin_inset Formula $Q_{g}$
  325. \end_inset
  326. is the gain resolution and
  327. \begin_inset Quotes eld
  328. \end_inset
  329. master
  330. \begin_inset Quotes erd
  331. \end_inset
  332. quality parameter.
  333. If we assume that MSE is a good criterion, then the angle quantization
  334. resolution should be (roughly)
  335. \begin_inset Formula
  336. \begin{equation}
  337. Q_{\theta}=\frac{d\hat{g}/d\gamma}{\hat{g}}=\frac{Q_{g}\left(1+2\alpha\right)\gamma^{2\alpha}}{Q_{g}\gamma^{1+2\alpha}}=\frac{\left(1+2\alpha\right)}{\gamma}\ .\label{eq:theta-quantization-step}
  338. \end{equation}
  339. \end_inset
  340. \end_layout
  341. \begin_layout Standard
  342. To derive the optimal
  343. \begin_inset Formula $K$
  344. \end_inset
  345. we need to consider the cosine distance between adjacent codevectors
  346. \begin_inset Formula $\mathbf{p}_{1}$
  347. \end_inset
  348. and
  349. \begin_inset Formula $\mathbf{p}_{2}$
  350. \end_inset
  351. for two cases:
  352. \begin_inset Formula $K<N$
  353. \end_inset
  354. and
  355. \begin_inset Formula $K>N$
  356. \end_inset
  357. .
  358. For
  359. \begin_inset Formula $K<N$
  360. \end_inset
  361. , the worst resolution occurs when no value in
  362. \begin_inset Formula $\mathbf{p}_{q}$
  363. \end_inset
  364. is larger than one.
  365. In that case, the two closest codevectors have a cosine distance
  366. \begin_inset Formula
  367. \begin{align*}
  368. \cos\tau & =\frac{\mathbf{p}_{1}^{T}\mathbf{p}_{2}}{\sqrt{\mathbf{p}_{2}^{T}\mathbf{p}_{2}}\sqrt{\mathbf{p}_{2}^{T}\mathbf{p}_{2}}}\\
  369. & =\frac{K-1}{K}\\
  370. & =1-\frac{1}{K}
  371. \end{align*}
  372. \end_inset
  373. By approximating the cosine, we then get
  374. \begin_inset Formula
  375. \begin{align}
  376. 1-\frac{1}{K} & =\cos\tau\approx1-\frac{\tau^{2}}{2}\nonumber \\
  377. K & \approx\frac{2}{\tau^{2}}\label{eq:small-K}
  378. \end{align}
  379. \end_inset
  380. For
  381. \begin_inset Formula $K>N$
  382. \end_inset
  383. the worst resolution happens when all values are equal to
  384. \begin_inset Formula $K/N$
  385. \end_inset
  386. in
  387. \begin_inset Formula $\mathbf{p}_{1}$
  388. \end_inset
  389. and
  390. \begin_inset Formula $\mathbf{p}_{2}$
  391. \end_inset
  392. differs by one pulse.
  393. In that case
  394. \begin_inset Formula
  395. \begin{align*}
  396. \cos\tau & =\frac{K^{2}/N}{\sqrt{\frac{K^{2}}{N}}\sqrt{\frac{K^{2}}{N}+2}}\\
  397. & =\frac{1}{\sqrt{1+\frac{2N}{K^{2}}}}\\
  398. & \approx1-\frac{N}{K^{2}}
  399. \end{align*}
  400. \end_inset
  401. By approximating the cosine, we get
  402. \begin_inset Formula
  403. \begin{align}
  404. 1-\frac{N}{K^{2}} & =\cos\tau\approx1-\frac{\tau^{2}}{2}\nonumber \\
  405. K & \approx\frac{\sqrt{2N}}{\tau}\ .\label{eq:large-K}
  406. \end{align}
  407. \end_inset
  408. \end_layout
  409. \begin_layout Standard
  410. By combining
  411. \begin_inset CommandInset ref
  412. LatexCommand eqref
  413. reference "eq:small-K"
  414. \end_inset
  415. with
  416. \begin_inset CommandInset ref
  417. LatexCommand eqref
  418. reference "eq:large-K"
  419. \end_inset
  420. , we have
  421. \begin_inset Formula
  422. \begin{equation}
  423. K\approx\min\left(\frac{\sqrt{2N}}{\tau},\frac{2}{\tau^{2}}\right)\label{eq:pulse-allocation}
  424. \end{equation}
  425. \end_inset
  426. The last step is to set
  427. \begin_inset Formula
  428. \begin{equation}
  429. \tau=Q_{\theta}/\sin\hat{\theta}\label{eq:tau-from-theta}
  430. \end{equation}
  431. \end_inset
  432. to account for the fact that the more the image differs from the reference,
  433. the higher the resolution needs to be.
  434. \end_layout
  435. \begin_layout Section
  436. Bi-Prediction
  437. \end_layout
  438. \begin_layout Standard
  439. We can use this scheme for bi-prediction by introducing a second
  440. \begin_inset Formula $\theta$
  441. \end_inset
  442. parameter.
  443. For the case of two (normalized) reference frames
  444. \begin_inset Formula $\mathbf{r}_{1}$
  445. \end_inset
  446. and
  447. \begin_inset Formula $\mathbf{r}_{2}$
  448. \end_inset
  449. , we introduce
  450. \begin_inset Formula $\mathbf{s}_{1}=\left(\mathbf{r}_{1}+\mathbf{r}_{2}\right)/2$
  451. \end_inset
  452. and
  453. \begin_inset Formula $\mathbf{s}_{2}=\left(\mathbf{r}_{1}-\mathbf{r}_{2}\right)/2$
  454. \end_inset
  455. .
  456. We start by using
  457. \begin_inset Formula $\mathbf{s}_{1}$
  458. \end_inset
  459. as a reference, apply the Householder reflection to both
  460. \begin_inset Formula $\mathbf{x}$
  461. \end_inset
  462. and
  463. \begin_inset Formula $\mathbf{s}_{2}$
  464. \end_inset
  465. , and evaluate
  466. \begin_inset Formula $\theta_{1}$
  467. \end_inset
  468. .
  469. From there, we derive a second Householder reflection from the reflected
  470. version of
  471. \begin_inset Formula $\mathbf{s}_{2}$
  472. \end_inset
  473. and apply it to
  474. \begin_inset Formula $\mathbf{x}_{r}$
  475. \end_inset
  476. .
  477. The result is that the
  478. \begin_inset Formula $\theta_{2}$
  479. \end_inset
  480. parameter controls how the current image compares to the two reference
  481. images.
  482. It should even be possible to use this in the case where the two references
  483. are before the frame being encoded, i.e.
  484. P frames based on two parents.
  485. This might help for fades.
  486. \end_layout
  487. \begin_layout Section
  488. Theoretical Ramblings on SSIM
  489. \end_layout
  490. \begin_layout Standard
  491. According to Wikipedia, the SSIM metric is defined as
  492. \begin_inset Formula
  493. \[
  494. \mathrm{SSIM}\left(x,y\right)=\left(\frac{\mu_{x}\mu_{y}+c_{1}}{\mu_{x}^{2}+\mu_{y}^{2}+c_{1}}\right)\cdot\left(\frac{\sigma_{xy}+c_{2}}{\sigma_{x}^{2}+\sigma_{y}^{2}+c_{2}}\right)\,.
  495. \]
  496. \end_inset
  497. Where
  498. \begin_inset Formula $\mu_{x}$
  499. \end_inset
  500. and
  501. \begin_inset Formula $\mu_{y}$
  502. \end_inset
  503. are the DC of images
  504. \begin_inset Formula $x$
  505. \end_inset
  506. and
  507. \begin_inset Formula $y$
  508. \end_inset
  509. and
  510. \begin_inset Formula $\sigma_{x}$
  511. \end_inset
  512. and
  513. \begin_inset Formula $\sigma_{y}$
  514. \end_inset
  515. are the RMS value of the AC coefficients of images
  516. \begin_inset Formula $x$
  517. \end_inset
  518. and
  519. \begin_inset Formula $y$
  520. \end_inset
  521. .
  522. From now on, we will consider
  523. \begin_inset Formula $x$
  524. \end_inset
  525. to be the reference image and
  526. \begin_inset Formula $y$
  527. \end_inset
  528. to be the coded image.
  529. Now, let's ignore the DC for now and define a Simplified SSIM metric as
  530. \begin_inset Formula
  531. \[
  532. \mathrm{SSSIM}\left(x,y\right)=\frac{2\sigma_{xy}+c_{2}}{\sigma_{x}^{2}+\sigma_{y}^{2}+c_{2}}\,.
  533. \]
  534. \end_inset
  535. This is the metric we'll try optimizing here.
  536. First, let
  537. \begin_inset Formula $g=\sigma_{y}/\sigma_{x}$
  538. \end_inset
  539. be gain that the codec causes on the AC coefficients and
  540. \begin_inset Formula $\hat{y}=y/g$
  541. \end_inset
  542. .
  543. Solving for
  544. \begin_inset Formula
  545. \[
  546. \frac{d}{dg}\mathrm{SSSIM}\left(x,y\right)=\frac{d}{dg}\frac{2g\sigma_{x\hat{y}}+c_{2}}{\sigma_{x}^{2}\left(1+g^{2}\right)+c_{2}}=0
  547. \]
  548. \end_inset
  549. we find that the optimal gain that maximizes SSSIM is
  550. \begin_inset Formula $g_{max}\approx1-\frac{c_{2}}{2\sigma_{x}^{2}}\cdot\left(\frac{\sigma_{x}^{2}}{\sigma_{x\hat{y}}}-1\right)$
  551. \end_inset
  552. .
  553. This means that conserving energy (
  554. \begin_inset Formula $g_{max}=1$
  555. \end_inset
  556. ) is a good thing to do as long as the contrat is high enough (
  557. \family roman
  558. \series medium
  559. \shape up
  560. \size normal
  561. \emph off
  562. \bar no
  563. \strikeout off
  564. \uuline off
  565. \uwave off
  566. \noun off
  567. \color none
  568. \begin_inset Formula $\frac{c_{2}}{2\sigma_{x}^{2}}$
  569. \end_inset
  570. is small) or the bit-rate is high enough (
  571. \begin_inset Formula $\frac{\sigma_{x}^{2}}{\sigma_{x\hat{y}}}$
  572. \end_inset
  573. close to 1).
  574. \end_layout
  575. \begin_layout Standard
  576. Now, let's consider a spherical horse in simple harmonic motion...
  577. or to be more exact, let's consider that the PVQ codebook is perfectly
  578. uniform over the sphere and that
  579. \begin_inset Formula $g_{max}=1$
  580. \end_inset
  581. .
  582. We get
  583. \begin_inset Formula
  584. \[
  585. \mathrm{SSSIM}\left(x,y\right)=\frac{\sigma_{xy}+c_{2}/2}{\sigma_{x}^{2}+c_{2}/2}\,,
  586. \]
  587. \end_inset
  588. where
  589. \begin_inset Formula $\sigma_{xy}/\sigma_{x}^{2}=\cos\theta$
  590. \end_inset
  591. is the cosine distance between
  592. \begin_inset Formula $x$
  593. \end_inset
  594. and
  595. \begin_inset Formula $y$
  596. \end_inset
  597. .
  598. Assuming a uniform quantizer, we have
  599. \begin_inset Formula
  600. \[
  601. \theta\propto2^{-b/(N-1)}\,,
  602. \]
  603. \end_inset
  604. where
  605. \begin_inset Formula $b$
  606. \end_inset
  607. is the number of bits allocated and
  608. \begin_inset Formula $N$
  609. \end_inset
  610. is the number of AC coefficients.
  611. Let
  612. \begin_inset Formula $c'=c/(2\sigma_{x}^{2})$
  613. \end_inset
  614. ...
  615. \end_layout
  616. \begin_layout Standard
  617. <FIXME: This needs to be cleaned up>
  618. \end_layout
  619. \begin_layout Standard
  620. \begin_inset Formula
  621. \[
  622. \mathrm{SSSIM}\left(x,y\right)=\frac{\cos\theta+c'}{1+c'}\approx\frac{1-\theta^{2}+c'}{1+c'}\,,
  623. \]
  624. \end_inset
  625. \end_layout
  626. \begin_layout Standard
  627. Trying to make SSIM equal for two blocks:
  628. \begin_inset Formula
  629. \[
  630. \frac{1+c_{1}'-2^{-2b_{1}/(N-1)}}{1+c_{1}'}=\frac{1+c_{2}'-2^{-2b_{2}/(N-1)}}{1+c_{2}'}
  631. \]
  632. \end_inset
  633. \end_layout
  634. \begin_layout Standard
  635. The optimal bit offset is
  636. \begin_inset Formula
  637. \[
  638. b=-\frac{N-1}{2}\log_{2}\left(1+2c_{2}/\sigma_{x}^{2}\right)
  639. \]
  640. \end_inset
  641. From this (theoretically) optimal offset, we can encode only the deviation
  642. from the optimal allocation.
  643. In practice,
  644. \begin_inset Formula $b$
  645. \end_inset
  646. would not be an exact bit allocation like for CELT, but only the
  647. \begin_inset Quotes eld
  648. \end_inset
  649. quantization step exponent
  650. \begin_inset Quotes erd
  651. \end_inset
  652. .
  653. \end_layout
  654. \begin_layout Section
  655. Conclusion
  656. \end_layout
  657. \begin_layout Standard
  658. While it seems like a good idea, we're still experimenting with the details.
  659. \end_layout
  660. \end_body
  661. \end_document