trans2d.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374
  1. /*Daala video codec
  2. Copyright (c) 2013 Daala project contributors. All rights reserved.
  3. Redistribution and use in source and binary forms, with or without
  4. modification, are permitted provided that the following conditions are met:
  5. - Redistributions of source code must retain the above copyright notice, this
  6. list of conditions and the following disclaimer.
  7. - Redistributions in binary form must reproduce the above copyright notice,
  8. this list of conditions and the following disclaimer in the documentation
  9. and/or other materials provided with the distribution.
  10. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
  11. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  12. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  13. DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
  14. FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  15. DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  16. SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  17. CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  18. OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  19. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
  20. #include <omp.h>
  21. #include <stdlib.h>
  22. #include "od_defs.h"
  23. #include "od_filter.h"
  24. #include "stats_tools.h"
  25. #include "trans_tools.h"
  26. #include "int_search.h"
  27. #include "kiss99.h"
  28. #define USE_FILES (0)
  29. #define USE_AR95 (1)
  30. #define USE_SUBSET1 (0)
  31. #define USE_SUBSET3 (0)
  32. #define PRINT_COV (0)
  33. #define CG_SEARCH (0)
  34. #define USE_SIMPLEX (1)
  35. #define RAMP_DYADIC (0)
  36. #if CG_SEARCH
  37. # if USE_TYPE3 && RAMP_DYADIC
  38. # error "Dyadic ramp constraint not supported for Type-III transform."
  39. # endif
  40. # if USE_SIMPLEX && RAMP_DYADIC
  41. # error "Dyadic ramp constraint not supported with simplex search."
  42. # endif
  43. static void coding_gain_search(const double _r[2*B_SZ*2*B_SZ]){
  44. # if !USE_SIMPLEX
  45. # if B_SZ==4
  46. {
  47. int f[4];
  48. int p0;
  49. int q0;
  50. int s0;
  51. int s1;
  52. double cg;
  53. double best_cg;
  54. best_cg=0;
  55. # if RAMP_DYADIC
  56. for(q0=(1<<FILTER_BITS);q0>=-(1<<FILTER_BITS);q0--){
  57. int t0;
  58. f[3]=q0;
  59. /* S0 = 4/1*(1-q0/64)
  60. * S0 >= 1 -> 64-q0 >= 16
  61. */
  62. t0=(1<<FILTER_BITS)-q0;
  63. s0=1*t0-0;
  64. if(s0>=(1<<FILTER_BITS-2)){
  65. s0*=4;
  66. f[0]=s0;
  67. for(p0=-(1<<FILTER_BITS);p0<=(1<<FILTER_BITS);p0++){
  68. f[2]=p0;
  69. /* S1 = 4/3*(1-(1-q0/64)*p0/64)
  70. * S1 >= 1 -> 64^2-(64-q0)*p0 >= 64*48
  71. * S1 = x/64 -> 64^2-(64-q0)*p0 = 0 MOD 48
  72. */
  73. s1=(1<<2*FILTER_BITS)-t0*p0;
  74. if(s1>=(1<<FILTER_BITS)*(3<<FILTER_BITS-2)&&s1%(3<<FILTER_BITS-2)==0){
  75. s1/=(3<<FILTER_BITS-2);
  76. f[1]=s1;
  77. cg=coding_gain_2d_collapsed(_r,f);
  78. if(cg>best_cg){
  79. best_cg=cg;
  80. printf("%i %i %i %i %G\n",p0,q0,s0,s1,cg);
  81. }
  82. }
  83. }
  84. }
  85. }
  86. # else
  87. for(p0=-(1<<FILTER_BITS);p0<=(1<<FILTER_BITS);p0++){
  88. f[2]=p0;
  89. for(q0=(1<<FILTER_BITS);q0>=-(1<<FILTER_BITS);q0--){
  90. f[3]=q0;
  91. for(s0=(1<<FILTER_BITS);s0<=2*(1<<FILTER_BITS);s0++){
  92. f[0]=s0;
  93. for(s1=(1<<FILTER_BITS);s1<=2*(1<<FILTER_BITS);s1++){
  94. f[1]=s1;
  95. cg=coding_gain_2d_collapsed(_r,f);
  96. if(cg>best_cg){
  97. best_cg=cg;
  98. printf("%i %i %i %i %G\n",p0,q0,s0,s1,cg);
  99. }
  100. }
  101. }
  102. }
  103. }
  104. # endif
  105. }
  106. # elif B_SZ==8
  107. {
  108. int f[10];
  109. int p0;
  110. int p1;
  111. int p2;
  112. int q0;
  113. int q1;
  114. int q2;
  115. int s0;
  116. int s1;
  117. int s2;
  118. int s3;
  119. double cg;
  120. double best_cg;
  121. best_cg=0;
  122. # if RAMP_DYADIC
  123. for(q0=(1<<FILTER_BITS);q0>=-(1<<FILTER_BITS);q0--){
  124. int t0;
  125. f[7]=q0;
  126. /* S0 = 8/1*(1-q0/64)
  127. * S0 >= 1 -> 64-q0 >= 8
  128. */
  129. t0=(1<<FILTER_BITS)-q0;
  130. s0=1*t0-0;
  131. if(s0>=(1<<FILTER_BITS-3)){
  132. s0*=8;
  133. f[0]=s0;
  134. for(p0=-(1<<FILTER_BITS);p0<=(1<<FILTER_BITS);p0++){
  135. f[4]=p0;
  136. for(q1=(1<<FILTER_BITS);q1>=-(1<<FILTER_BITS);q1--){
  137. int t1;
  138. f[8]=q1;
  139. /* S1 = 8/3*((1-q1/64)-(1-q0/64)*p0/64)
  140. * S1 >= 1 -> 64*t1-t0*p0 >= 64*24
  141. * S1 = x/64 -> 64*t1-t0*p0 = 0 MOD 24
  142. */
  143. t1=(1<<FILTER_BITS)-q1;
  144. s1=(1<<FILTER_BITS)*t1-t0*p0;
  145. if(s1>=(1<<FILTER_BITS)*(3<<FILTER_BITS-3)&&
  146. s1%(3<<FILTER_BITS-3)==0){
  147. s1/=(3<<FILTER_BITS-3);
  148. f[1]=s1;
  149. for(p1=-(1<<FILTER_BITS);p1<=(1<<FILTER_BITS);p1++){
  150. f[5]=p1;
  151. for(q2=(1<<FILTER_BITS);q2>=-(1<<FILTER_BITS);q2--){
  152. int t2;
  153. f[9]=q2;
  154. /* S2 = 8/5*((1-q2/64)-(1-q1/64)*p1/64)
  155. * S2 >= 1 -> 64*t2-t1*p1) >= 64*40
  156. * S2 = x/64 -> 64*t2-t1*p1 = 0 MOD 40
  157. */
  158. t2=(1<<FILTER_BITS)-q2;
  159. s2=(1<<FILTER_BITS)*t2-t1*p1;
  160. if(s2>=(1<<FILTER_BITS)*(5<<FILTER_BITS-3)&&
  161. s2%(5<<FILTER_BITS-3)==0){
  162. s2/=(5<<FILTER_BITS-3);
  163. f[2]=s2;
  164. for(p2=-(1<<FILTER_BITS);p2<=(1<<FILTER_BITS);p2++){
  165. f[6]=p2;
  166. /* S3 = 8/7*(1-(1-q2/64)*p2/64)
  167. * S3 >= 1 -> 64^2-t2*p2 >= 64*56
  168. * S3 = x/64 -> 64^2-t2*p2 = 0 MOD 56
  169. */
  170. s3=(1<<2*FILTER_BITS)-t2*p2;
  171. if(s3>=(1<<FILTER_BITS)*(7<<FILTER_BITS-3)&&
  172. s3%(7<<FILTER_BITS-3)==0){
  173. s3/=(7<<FILTER_BITS-3);
  174. f[3]=s3;
  175. cg=coding_gain_2d_collapsed(_r,f);
  176. if(cg>best_cg){
  177. best_cg=cg;
  178. printf("%i %i %i %i %i %i %i %i %i %i %-24.18G\n",
  179. p0,p1,p2,q0,q1,q2,s0,s1,s2,s3,cg);
  180. }
  181. }
  182. }
  183. }
  184. }
  185. }
  186. }
  187. }
  188. }
  189. }
  190. }
  191. # else
  192. # error "Exhaustive search for B_SZ==8 only supported using RAMP_DYADIC (1)."
  193. # endif
  194. }
  195. # else
  196. # error "Exhaustive search not supported for this block size."
  197. # endif
  198. # else
  199. {
  200. int dims;
  201. int i;
  202. kiss99_ctx ks[NUM_PROCS];
  203. int lb[22];
  204. int ub[22];
  205. # if B_SZ==4
  206. dims=4;
  207. # elif B_SZ==8
  208. dims=10;
  209. # elif B_SZ==16
  210. dims=22;
  211. # else
  212. # error "Unsupported block size."
  213. # endif
  214. for(i=0;i<dims;i++){
  215. lb[i]=i<(B_SZ>>1)?(1<<FILTER_BITS):-(1<<FILTER_BITS);
  216. ub[i]=i<(B_SZ>>1)?2*(1<<FILTER_BITS):(1<<FILTER_BITS);
  217. }
  218. for(i=0;i<NUM_PROCS;i++){
  219. uint32_t srand;
  220. srand=i*16843009; /*Broadcast char to 4xchar*/
  221. kiss99_srand(&ks[i],(unsigned char *)&srand,sizeof(srand));
  222. }
  223. #pragma omp parallel for schedule(dynamic)
  224. for(i=0;i<128;i++){
  225. int tid;
  226. int j;
  227. # if B_SZ==4
  228. int f[4];
  229. # elif B_SZ==8
  230. int f[10];
  231. # elif B_SZ==16
  232. int f[22];
  233. # else
  234. # error "Unsupported block size."
  235. # endif
  236. double cg;
  237. tid=omp_get_thread_num();
  238. for(j=0;j<dims;j++){
  239. int range;
  240. int mask;
  241. int rng;
  242. range=ub[j]-lb[j];
  243. mask=(1<<OD_ILOG_NZ(range))-1;
  244. do {
  245. rng=((int)kiss99_rand(&ks[tid]))&mask;
  246. }
  247. while(rng>range);
  248. f[j]=lb[j]+rng;
  249. }
  250. j=int_simplex_max(&cg,dims,coding_gain_2d_collapsed,_r,lb,ub,f);
  251. fprintf(stdout,"obj=%-24.18G steps=%4d params={",cg,j);
  252. for(j=0;j<dims;j++){
  253. fprintf(stdout,"%3d%c",f[j],j==dims-1?'}':',');
  254. }
  255. fprintf(stdout,"\n");
  256. }
  257. }
  258. # endif
  259. }
  260. #endif
  261. #if USE_FILES
  262. static int t_start(void *_ctx,const char *_name,const th_info *_ti,int _pli,
  263. int _nxblocks,int _nyblocks){
  264. trans_ctx *ctx;
  265. fprintf(stdout,"%s %i %i\n",_name,_nxblocks,_nyblocks);
  266. fflush(stdout);
  267. ctx=(trans_ctx *)_ctx;
  268. image_ctx_init(&ctx->img,_name,_nxblocks,_nyblocks);
  269. return EXIT_SUCCESS;
  270. }
  271. static void t_load_data(void *_ctx,const unsigned char *_data,int _stride,
  272. int _bi,int _bj){
  273. trans_ctx *ctx;
  274. ctx=(trans_ctx *)_ctx;
  275. if(_bi==0&&_bj==0){
  276. int y;
  277. int x;
  278. int j;
  279. int i;
  280. unsigned char buf[2*B_SZ*2*B_SZ];
  281. for(y=0;y<ctx->img.nyblocks*B_SZ-(2*B_SZ-1);y++){
  282. for(x=0;x<ctx->img.nxblocks*B_SZ-(2*B_SZ-1);x++){
  283. for(j=0;j<2*B_SZ;j++){
  284. for(i=0;i<2*B_SZ;i++){
  285. buf[j*2*B_SZ+i]=_data[(y+j)*_stride+(x+i)];
  286. }
  287. }
  288. trans_data_add(&ctx->td,buf);
  289. }
  290. }
  291. }
  292. }
  293. #define PADDING (0)
  294. const block_func BLOCKS[]={
  295. t_load_data
  296. };
  297. const int NBLOCKS=sizeof(BLOCKS)/sizeof(*BLOCKS);
  298. #endif
  299. int main(int _argc,const char *_argv[]){
  300. trans_ctx ctx[NUM_PROCS];
  301. const int *f;
  302. int i;
  303. double r[2*B_SZ*2*B_SZ];
  304. const double *cov;
  305. #if B_SZ==4
  306. f=OD_FILTER_PARAMS4;
  307. #elif B_SZ==8
  308. f=OD_FILTER_PARAMS8;
  309. #elif B_SZ==16
  310. f=OD_FILTER_PARAMS16;
  311. #else
  312. # error "Need filter params for this block size."
  313. #endif
  314. for(i=0;i<NUM_PROCS;i++){
  315. trans_data_init(&ctx[i].td,2*B_SZ*2*B_SZ);
  316. }
  317. cov=r;
  318. #if USE_FILES
  319. omp_set_num_threads(NUM_PROCS);
  320. ne_apply_to_blocks(ctx,sizeof(*ctx),0x1,PADDING,t_start,NBLOCKS,BLOCKS,NULL,
  321. _argc,_argv);
  322. for(i=1;i<NUM_PROCS;i++){
  323. trans_data_combine(&ctx[0].td,&ctx[i].td);
  324. }
  325. trans_data_normalize(&ctx[0].td);
  326. # if PRINT_COV
  327. trans_data_print(&ctx[0].td,stderr);
  328. # endif
  329. fprintf(stdout,"original cg=%- 24.16G\n",coding_gain_2d(ctx[0].td.cov));
  330. trans_data_collapse(&ctx[0].td,2*B_SZ,r);
  331. fprintf(stdout,"collapse cg=%- 24.16G\n",coding_gain_2d_collapsed(r));
  332. trans_data_expand(&ctx[0].td,2*B_SZ,r);
  333. fprintf(stdout,"expanded cg=%- 24.16G\n",coding_gain_2d(ctx[0].td.cov));
  334. #elif USE_AR95
  335. auto_regressive_collapsed(r,2*B_SZ*2*B_SZ,2*B_SZ,0.95);
  336. #elif USE_SUBSET1
  337. # if B_SZ_LOG>=OD_LOG_BSIZE0&&B_SZ_LOG<OD_LOG_BSIZE0+OD_NBSIZES
  338. cov=SUBSET1_2D[B_SZ_LOG-OD_LOG_BSIZE0];
  339. # else
  340. # error "Need auto-correlation matrix for subset1 for this block size."
  341. # endif
  342. #elif USE_SUBSET3
  343. # if B_SZ_LOG>=OD_LOG_BSIZE0&&B_SZ_LOG<OD_LOG_BSIZE0+OD_NBSIZES
  344. cov=SUBSET3_2D[B_SZ_LOG-OD_LOG_BSIZE0];
  345. # else
  346. # error "Need auto-correlation matrix for subset3 for this block size."
  347. # endif
  348. #endif
  349. #if CG_SEARCH
  350. coding_gain_search(cov);
  351. #else
  352. fprintf(stdout,"cg=%-24.18G\n",coding_gain_2d_collapsed(cov,f));
  353. #endif
  354. for(i=0;i<NUM_PROCS;i++){
  355. trans_data_clear(&ctx[i].td);
  356. }
  357. return EXIT_SUCCESS;
  358. }