threading.c 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929
  1. /*
  2. * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "vpx_config.h"
  11. #include "vp8_rtcd.h"
  12. #if !defined(WIN32) && CONFIG_OS_SUPPORT == 1
  13. # include <unistd.h>
  14. #endif
  15. #include "onyxd_int.h"
  16. #include "vpx_mem/vpx_mem.h"
  17. #include "vp8/common/threading.h"
  18. #include "vp8/common/loopfilter.h"
  19. #include "vp8/common/extend.h"
  20. #include "vpx_ports/vpx_timer.h"
  21. #include "detokenize.h"
  22. #include "vp8/common/reconintra4x4.h"
  23. #include "vp8/common/reconinter.h"
  24. #include "vp8/common/reconintra.h"
  25. #include "vp8/common/setupintrarecon.h"
  26. #if CONFIG_ERROR_CONCEALMENT
  27. #include "error_concealment.h"
  28. #endif
  29. #define CALLOC_ARRAY(p, n) CHECK_MEM_ERROR((p), vpx_calloc(sizeof(*(p)), (n)))
  30. #define CALLOC_ARRAY_ALIGNED(p, n, algn) do { \
  31. CHECK_MEM_ERROR((p), vpx_memalign((algn), sizeof(*(p)) * (n))); \
  32. memset((p), 0, (n) * sizeof(*(p))); \
  33. } while (0)
  34. void vp8_mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd);
  35. static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
  36. {
  37. VP8_COMMON *const pc = & pbi->common;
  38. int i;
  39. for (i = 0; i < count; i++)
  40. {
  41. MACROBLOCKD *mbd = &mbrd[i].mbd;
  42. mbd->subpixel_predict = xd->subpixel_predict;
  43. mbd->subpixel_predict8x4 = xd->subpixel_predict8x4;
  44. mbd->subpixel_predict8x8 = xd->subpixel_predict8x8;
  45. mbd->subpixel_predict16x16 = xd->subpixel_predict16x16;
  46. mbd->frame_type = pc->frame_type;
  47. mbd->pre = xd->pre;
  48. mbd->dst = xd->dst;
  49. mbd->segmentation_enabled = xd->segmentation_enabled;
  50. mbd->mb_segement_abs_delta = xd->mb_segement_abs_delta;
  51. memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
  52. /*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/
  53. memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas));
  54. /*signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];*/
  55. memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas));
  56. /*unsigned char mode_ref_lf_delta_enabled;
  57. unsigned char mode_ref_lf_delta_update;*/
  58. mbd->mode_ref_lf_delta_enabled = xd->mode_ref_lf_delta_enabled;
  59. mbd->mode_ref_lf_delta_update = xd->mode_ref_lf_delta_update;
  60. mbd->current_bc = &pbi->mbc[0];
  61. memcpy(mbd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc));
  62. memcpy(mbd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));
  63. memcpy(mbd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2));
  64. memcpy(mbd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));
  65. mbd->fullpixel_mask = 0xffffffff;
  66. if (pc->full_pixel)
  67. mbd->fullpixel_mask = 0xfffffff8;
  68. }
  69. for (i = 0; i < pc->mb_rows; i++)
  70. pbi->mt_current_mb_col[i] = -1;
  71. }
  72. static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
  73. unsigned int mb_idx)
  74. {
  75. MB_PREDICTION_MODE mode;
  76. int i;
  77. #if CONFIG_ERROR_CONCEALMENT
  78. int corruption_detected = 0;
  79. #else
  80. (void)mb_idx;
  81. #endif
  82. if (xd->mode_info_context->mbmi.mb_skip_coeff)
  83. {
  84. vp8_reset_mb_tokens_context(xd);
  85. }
  86. else if (!vp8dx_bool_error(xd->current_bc))
  87. {
  88. int eobtotal;
  89. eobtotal = vp8_decode_mb_tokens(pbi, xd);
  90. /* Special case: Force the loopfilter to skip when eobtotal is zero */
  91. xd->mode_info_context->mbmi.mb_skip_coeff = (eobtotal==0);
  92. }
  93. mode = xd->mode_info_context->mbmi.mode;
  94. if (xd->segmentation_enabled)
  95. vp8_mb_init_dequantizer(pbi, xd);
  96. #if CONFIG_ERROR_CONCEALMENT
  97. if(pbi->ec_active)
  98. {
  99. int throw_residual;
  100. /* When we have independent partitions we can apply residual even
  101. * though other partitions within the frame are corrupt.
  102. */
  103. throw_residual = (!pbi->independent_partitions &&
  104. pbi->frame_corrupt_residual);
  105. throw_residual = (throw_residual || vp8dx_bool_error(xd->current_bc));
  106. if ((mb_idx >= pbi->mvs_corrupt_from_mb || throw_residual))
  107. {
  108. /* MB with corrupt residuals or corrupt mode/motion vectors.
  109. * Better to use the predictor as reconstruction.
  110. */
  111. pbi->frame_corrupt_residual = 1;
  112. memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
  113. corruption_detected = 1;
  114. /* force idct to be skipped for B_PRED and use the
  115. * prediction only for reconstruction
  116. * */
  117. memset(xd->eobs, 0, 25);
  118. }
  119. }
  120. #endif
  121. /* do prediction */
  122. if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
  123. {
  124. vp8_build_intra_predictors_mbuv_s(xd,
  125. xd->recon_above[1],
  126. xd->recon_above[2],
  127. xd->recon_left[1],
  128. xd->recon_left[2],
  129. xd->recon_left_stride[1],
  130. xd->dst.u_buffer, xd->dst.v_buffer,
  131. xd->dst.uv_stride);
  132. if (mode != B_PRED)
  133. {
  134. vp8_build_intra_predictors_mby_s(xd,
  135. xd->recon_above[0],
  136. xd->recon_left[0],
  137. xd->recon_left_stride[0],
  138. xd->dst.y_buffer,
  139. xd->dst.y_stride);
  140. }
  141. else
  142. {
  143. short *DQC = xd->dequant_y1;
  144. int dst_stride = xd->dst.y_stride;
  145. /* clear out residual eob info */
  146. if(xd->mode_info_context->mbmi.mb_skip_coeff)
  147. memset(xd->eobs, 0, 25);
  148. intra_prediction_down_copy(xd, xd->recon_above[0] + 16);
  149. for (i = 0; i < 16; i++)
  150. {
  151. BLOCKD *b = &xd->block[i];
  152. unsigned char *dst = xd->dst.y_buffer + b->offset;
  153. B_PREDICTION_MODE b_mode =
  154. xd->mode_info_context->bmi[i].as_mode;
  155. unsigned char *Above;
  156. unsigned char *yleft;
  157. int left_stride;
  158. unsigned char top_left;
  159. /*Caution: For some b_mode, it needs 8 pixels (4 above + 4 above-right).*/
  160. if (i < 4 && pbi->common.filter_level)
  161. Above = xd->recon_above[0] + b->offset;
  162. else
  163. Above = dst - dst_stride;
  164. if (i%4==0 && pbi->common.filter_level)
  165. {
  166. yleft = xd->recon_left[0] + i;
  167. left_stride = 1;
  168. }
  169. else
  170. {
  171. yleft = dst - 1;
  172. left_stride = dst_stride;
  173. }
  174. if ((i==4 || i==8 || i==12) && pbi->common.filter_level)
  175. top_left = *(xd->recon_left[0] + i - 1);
  176. else
  177. top_left = Above[-1];
  178. vp8_intra4x4_predict(Above, yleft, left_stride,
  179. b_mode, dst, dst_stride, top_left);
  180. if (xd->eobs[i] )
  181. {
  182. if (xd->eobs[i] > 1)
  183. {
  184. vp8_dequant_idct_add(b->qcoeff, DQC, dst, dst_stride);
  185. }
  186. else
  187. {
  188. vp8_dc_only_idct_add(b->qcoeff[0] * DQC[0],
  189. dst, dst_stride, dst, dst_stride);
  190. memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
  191. }
  192. }
  193. }
  194. }
  195. }
  196. else
  197. {
  198. vp8_build_inter_predictors_mb(xd);
  199. }
  200. #if CONFIG_ERROR_CONCEALMENT
  201. if (corruption_detected)
  202. {
  203. return;
  204. }
  205. #endif
  206. if(!xd->mode_info_context->mbmi.mb_skip_coeff)
  207. {
  208. /* dequantization and idct */
  209. if (mode != B_PRED)
  210. {
  211. short *DQC = xd->dequant_y1;
  212. if (mode != SPLITMV)
  213. {
  214. BLOCKD *b = &xd->block[24];
  215. /* do 2nd order transform on the dc block */
  216. if (xd->eobs[24] > 1)
  217. {
  218. vp8_dequantize_b(b, xd->dequant_y2);
  219. vp8_short_inv_walsh4x4(&b->dqcoeff[0],
  220. xd->qcoeff);
  221. memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0]));
  222. }
  223. else
  224. {
  225. b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0];
  226. vp8_short_inv_walsh4x4_1(&b->dqcoeff[0],
  227. xd->qcoeff);
  228. memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
  229. }
  230. /* override the dc dequant constant in order to preserve the
  231. * dc components
  232. */
  233. DQC = xd->dequant_y1_dc;
  234. }
  235. vp8_dequant_idct_add_y_block
  236. (xd->qcoeff, DQC,
  237. xd->dst.y_buffer,
  238. xd->dst.y_stride, xd->eobs);
  239. }
  240. vp8_dequant_idct_add_uv_block
  241. (xd->qcoeff+16*16, xd->dequant_uv,
  242. xd->dst.u_buffer, xd->dst.v_buffer,
  243. xd->dst.uv_stride, xd->eobs+16);
  244. }
  245. }
  246. static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row)
  247. {
  248. const int *last_row_current_mb_col;
  249. int *current_mb_col;
  250. int mb_row;
  251. VP8_COMMON *pc = &pbi->common;
  252. const int nsync = pbi->sync_range;
  253. const int first_row_no_sync_above = pc->mb_cols + nsync;
  254. int num_part = 1 << pbi->common.multi_token_partition;
  255. int last_mb_row = start_mb_row;
  256. YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME];
  257. YV12_BUFFER_CONFIG *yv12_fb_lst = pbi->dec_fb_ref[LAST_FRAME];
  258. int recon_y_stride = yv12_fb_new->y_stride;
  259. int recon_uv_stride = yv12_fb_new->uv_stride;
  260. unsigned char *ref_buffer[MAX_REF_FRAMES][3];
  261. unsigned char *dst_buffer[3];
  262. int i;
  263. int ref_fb_corrupted[MAX_REF_FRAMES];
  264. ref_fb_corrupted[INTRA_FRAME] = 0;
  265. for(i = 1; i < MAX_REF_FRAMES; i++)
  266. {
  267. YV12_BUFFER_CONFIG *this_fb = pbi->dec_fb_ref[i];
  268. ref_buffer[i][0] = this_fb->y_buffer;
  269. ref_buffer[i][1] = this_fb->u_buffer;
  270. ref_buffer[i][2] = this_fb->v_buffer;
  271. ref_fb_corrupted[i] = this_fb->corrupted;
  272. }
  273. dst_buffer[0] = yv12_fb_new->y_buffer;
  274. dst_buffer[1] = yv12_fb_new->u_buffer;
  275. dst_buffer[2] = yv12_fb_new->v_buffer;
  276. xd->up_available = (start_mb_row != 0);
  277. xd->mode_info_context = pc->mi + pc->mode_info_stride * start_mb_row;
  278. xd->mode_info_stride = pc->mode_info_stride;
  279. for (mb_row = start_mb_row; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
  280. {
  281. int recon_yoffset, recon_uvoffset;
  282. int mb_col;
  283. int filter_level;
  284. loop_filter_info_n *lfi_n = &pc->lf_info;
  285. /* save last row processed by this thread */
  286. last_mb_row = mb_row;
  287. /* select bool coder for current partition */
  288. xd->current_bc = &pbi->mbc[mb_row%num_part];
  289. if (mb_row > 0)
  290. last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1];
  291. else
  292. last_row_current_mb_col = &first_row_no_sync_above;
  293. current_mb_col = &pbi->mt_current_mb_col[mb_row];
  294. recon_yoffset = mb_row * recon_y_stride * 16;
  295. recon_uvoffset = mb_row * recon_uv_stride * 8;
  296. /* reset contexts */
  297. xd->above_context = pc->above_context;
  298. memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
  299. xd->left_available = 0;
  300. xd->mb_to_top_edge = -((mb_row * 16)) << 3;
  301. xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
  302. if (pbi->common.filter_level)
  303. {
  304. xd->recon_above[0] = pbi->mt_yabove_row[mb_row] + 0*16 +32;
  305. xd->recon_above[1] = pbi->mt_uabove_row[mb_row] + 0*8 +16;
  306. xd->recon_above[2] = pbi->mt_vabove_row[mb_row] + 0*8 +16;
  307. xd->recon_left[0] = pbi->mt_yleft_col[mb_row];
  308. xd->recon_left[1] = pbi->mt_uleft_col[mb_row];
  309. xd->recon_left[2] = pbi->mt_vleft_col[mb_row];
  310. /* TODO: move to outside row loop */
  311. xd->recon_left_stride[0] = 1;
  312. xd->recon_left_stride[1] = 1;
  313. }
  314. else
  315. {
  316. xd->recon_above[0] = dst_buffer[0] + recon_yoffset;
  317. xd->recon_above[1] = dst_buffer[1] + recon_uvoffset;
  318. xd->recon_above[2] = dst_buffer[2] + recon_uvoffset;
  319. xd->recon_left[0] = xd->recon_above[0] - 1;
  320. xd->recon_left[1] = xd->recon_above[1] - 1;
  321. xd->recon_left[2] = xd->recon_above[2] - 1;
  322. xd->recon_above[0] -= xd->dst.y_stride;
  323. xd->recon_above[1] -= xd->dst.uv_stride;
  324. xd->recon_above[2] -= xd->dst.uv_stride;
  325. /* TODO: move to outside row loop */
  326. xd->recon_left_stride[0] = xd->dst.y_stride;
  327. xd->recon_left_stride[1] = xd->dst.uv_stride;
  328. setup_intra_recon_left(xd->recon_left[0], xd->recon_left[1],
  329. xd->recon_left[2], xd->dst.y_stride,
  330. xd->dst.uv_stride);
  331. }
  332. for (mb_col = 0; mb_col < pc->mb_cols; mb_col++) {
  333. if (((mb_col - 1) % nsync) == 0) {
  334. pthread_mutex_t *mutex = &pbi->pmutex[mb_row];
  335. protected_write(mutex, current_mb_col, mb_col - 1);
  336. }
  337. if (mb_row && !(mb_col & (nsync - 1))) {
  338. pthread_mutex_t *mutex = &pbi->pmutex[mb_row-1];
  339. sync_read(mutex, mb_col, last_row_current_mb_col, nsync);
  340. }
  341. /* Distance of MB to the various image edges.
  342. * These are specified to 8th pel as they are always
  343. * compared to values that are in 1/8th pel units.
  344. */
  345. xd->mb_to_left_edge = -((mb_col * 16) << 3);
  346. xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
  347. #if CONFIG_ERROR_CONCEALMENT
  348. {
  349. int corrupt_residual =
  350. (!pbi->independent_partitions &&
  351. pbi->frame_corrupt_residual) ||
  352. vp8dx_bool_error(xd->current_bc);
  353. if (pbi->ec_active &&
  354. (xd->mode_info_context->mbmi.ref_frame ==
  355. INTRA_FRAME) &&
  356. corrupt_residual)
  357. {
  358. /* We have an intra block with corrupt
  359. * coefficients, better to conceal with an inter
  360. * block.
  361. * Interpolate MVs from neighboring MBs
  362. *
  363. * Note that for the first mb with corrupt
  364. * residual in a frame, we might not discover
  365. * that before decoding the residual. That
  366. * happens after this check, and therefore no
  367. * inter concealment will be done.
  368. */
  369. vp8_interpolate_motion(xd,
  370. mb_row, mb_col,
  371. pc->mb_rows, pc->mb_cols);
  372. }
  373. }
  374. #endif
  375. xd->dst.y_buffer = dst_buffer[0] + recon_yoffset;
  376. xd->dst.u_buffer = dst_buffer[1] + recon_uvoffset;
  377. xd->dst.v_buffer = dst_buffer[2] + recon_uvoffset;
  378. xd->pre.y_buffer = ref_buffer[xd->mode_info_context->mbmi.ref_frame][0] + recon_yoffset;
  379. xd->pre.u_buffer = ref_buffer[xd->mode_info_context->mbmi.ref_frame][1] + recon_uvoffset;
  380. xd->pre.v_buffer = ref_buffer[xd->mode_info_context->mbmi.ref_frame][2] + recon_uvoffset;
  381. /* propagate errors from reference frames */
  382. xd->corrupted |= ref_fb_corrupted[xd->mode_info_context->mbmi.ref_frame];
  383. mt_decode_macroblock(pbi, xd, 0);
  384. xd->left_available = 1;
  385. /* check if the boolean decoder has suffered an error */
  386. xd->corrupted |= vp8dx_bool_error(xd->current_bc);
  387. xd->recon_above[0] += 16;
  388. xd->recon_above[1] += 8;
  389. xd->recon_above[2] += 8;
  390. if (!pbi->common.filter_level)
  391. {
  392. xd->recon_left[0] += 16;
  393. xd->recon_left[1] += 8;
  394. xd->recon_left[2] += 8;
  395. }
  396. if (pbi->common.filter_level)
  397. {
  398. int skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED &&
  399. xd->mode_info_context->mbmi.mode != SPLITMV &&
  400. xd->mode_info_context->mbmi.mb_skip_coeff);
  401. const int mode_index = lfi_n->mode_lf_lut[xd->mode_info_context->mbmi.mode];
  402. const int seg = xd->mode_info_context->mbmi.segment_id;
  403. const int ref_frame = xd->mode_info_context->mbmi.ref_frame;
  404. filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
  405. if( mb_row != pc->mb_rows-1 )
  406. {
  407. /* Save decoded MB last row data for next-row decoding */
  408. memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
  409. memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
  410. memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
  411. }
  412. /* save left_col for next MB decoding */
  413. if(mb_col != pc->mb_cols-1)
  414. {
  415. MODE_INFO *next = xd->mode_info_context +1;
  416. if (next->mbmi.ref_frame == INTRA_FRAME)
  417. {
  418. for (i = 0; i < 16; i++)
  419. pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15];
  420. for (i = 0; i < 8; i++)
  421. {
  422. pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7];
  423. pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7];
  424. }
  425. }
  426. }
  427. /* loopfilter on this macroblock. */
  428. if (filter_level)
  429. {
  430. if(pc->filter_type == NORMAL_LOOPFILTER)
  431. {
  432. loop_filter_info lfi;
  433. FRAME_TYPE frame_type = pc->frame_type;
  434. const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
  435. lfi.mblim = lfi_n->mblim[filter_level];
  436. lfi.blim = lfi_n->blim[filter_level];
  437. lfi.lim = lfi_n->lim[filter_level];
  438. lfi.hev_thr = lfi_n->hev_thr[hev_index];
  439. if (mb_col > 0)
  440. vp8_loop_filter_mbv
  441. (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
  442. if (!skip_lf)
  443. vp8_loop_filter_bv
  444. (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
  445. /* don't apply across umv border */
  446. if (mb_row > 0)
  447. vp8_loop_filter_mbh
  448. (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
  449. if (!skip_lf)
  450. vp8_loop_filter_bh
  451. (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
  452. }
  453. else
  454. {
  455. if (mb_col > 0)
  456. vp8_loop_filter_simple_mbv
  457. (xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]);
  458. if (!skip_lf)
  459. vp8_loop_filter_simple_bv
  460. (xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]);
  461. /* don't apply across umv border */
  462. if (mb_row > 0)
  463. vp8_loop_filter_simple_mbh
  464. (xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]);
  465. if (!skip_lf)
  466. vp8_loop_filter_simple_bh
  467. (xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]);
  468. }
  469. }
  470. }
  471. recon_yoffset += 16;
  472. recon_uvoffset += 8;
  473. ++xd->mode_info_context; /* next mb */
  474. xd->above_context++;
  475. }
  476. /* adjust to the next row of mbs */
  477. if (pbi->common.filter_level)
  478. {
  479. if(mb_row != pc->mb_rows-1)
  480. {
  481. int lasty = yv12_fb_lst->y_width + VP8BORDERINPIXELS;
  482. int lastuv = (yv12_fb_lst->y_width>>1) + (VP8BORDERINPIXELS>>1);
  483. for (i = 0; i < 4; i++)
  484. {
  485. pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1];
  486. pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1];
  487. pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1];
  488. }
  489. }
  490. }
  491. else
  492. vp8_extend_mb_row(yv12_fb_new, xd->dst.y_buffer + 16,
  493. xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
  494. /* last MB of row is ready just after extension is done */
  495. protected_write(&pbi->pmutex[mb_row], current_mb_col, mb_col + nsync);
  496. ++xd->mode_info_context; /* skip prediction column */
  497. xd->up_available = 1;
  498. /* since we have multithread */
  499. xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
  500. }
  501. /* signal end of frame decoding if this thread processed the last mb_row */
  502. if (last_mb_row == (pc->mb_rows - 1))
  503. sem_post(&pbi->h_event_end_decoding);
  504. }
  505. static THREAD_FUNCTION thread_decoding_proc(void *p_data)
  506. {
  507. int ithread = ((DECODETHREAD_DATA *)p_data)->ithread;
  508. VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1);
  509. MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2);
  510. ENTROPY_CONTEXT_PLANES mb_row_left_context;
  511. while (1)
  512. {
  513. if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd) == 0)
  514. break;
  515. if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0)
  516. {
  517. if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd) == 0)
  518. break;
  519. else
  520. {
  521. MACROBLOCKD *xd = &mbrd->mbd;
  522. xd->left_context = &mb_row_left_context;
  523. mt_decode_mb_rows(pbi, xd, ithread+1);
  524. }
  525. }
  526. }
  527. return 0 ;
  528. }
  529. void vp8_decoder_create_threads(VP8D_COMP *pbi)
  530. {
  531. int core_count = 0;
  532. unsigned int ithread;
  533. pbi->b_multithreaded_rd = 0;
  534. pbi->allocated_decoding_thread_count = 0;
  535. pthread_mutex_init(&pbi->mt_mutex, NULL);
  536. /* limit decoding threads to the max number of token partitions */
  537. core_count = (pbi->max_threads > 8) ? 8 : pbi->max_threads;
  538. /* limit decoding threads to the available cores */
  539. if (core_count > pbi->common.processor_core_count)
  540. core_count = pbi->common.processor_core_count;
  541. if (core_count > 1)
  542. {
  543. pbi->b_multithreaded_rd = 1;
  544. pbi->decoding_thread_count = core_count - 1;
  545. CALLOC_ARRAY(pbi->h_decoding_thread, pbi->decoding_thread_count);
  546. CALLOC_ARRAY(pbi->h_event_start_decoding, pbi->decoding_thread_count);
  547. CALLOC_ARRAY_ALIGNED(pbi->mb_row_di, pbi->decoding_thread_count, 32);
  548. CALLOC_ARRAY(pbi->de_thread_data, pbi->decoding_thread_count);
  549. for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++)
  550. {
  551. sem_init(&pbi->h_event_start_decoding[ithread], 0, 0);
  552. vp8_setup_block_dptrs(&pbi->mb_row_di[ithread].mbd);
  553. pbi->de_thread_data[ithread].ithread = ithread;
  554. pbi->de_thread_data[ithread].ptr1 = (void *)pbi;
  555. pbi->de_thread_data[ithread].ptr2 = (void *) &pbi->mb_row_di[ithread];
  556. pthread_create(&pbi->h_decoding_thread[ithread], 0, thread_decoding_proc, (&pbi->de_thread_data[ithread]));
  557. }
  558. sem_init(&pbi->h_event_end_decoding, 0, 0);
  559. pbi->allocated_decoding_thread_count = pbi->decoding_thread_count;
  560. }
  561. }
  562. void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows)
  563. {
  564. int i;
  565. if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd))
  566. {
  567. /* De-allocate mutex */
  568. if (pbi->pmutex != NULL) {
  569. for (i = 0; i < mb_rows; i++) {
  570. pthread_mutex_destroy(&pbi->pmutex[i]);
  571. }
  572. vpx_free(pbi->pmutex);
  573. pbi->pmutex = NULL;
  574. }
  575. vpx_free(pbi->mt_current_mb_col);
  576. pbi->mt_current_mb_col = NULL ;
  577. /* Free above_row buffers. */
  578. if (pbi->mt_yabove_row)
  579. {
  580. for (i=0; i< mb_rows; i++)
  581. {
  582. vpx_free(pbi->mt_yabove_row[i]);
  583. pbi->mt_yabove_row[i] = NULL ;
  584. }
  585. vpx_free(pbi->mt_yabove_row);
  586. pbi->mt_yabove_row = NULL ;
  587. }
  588. if (pbi->mt_uabove_row)
  589. {
  590. for (i=0; i< mb_rows; i++)
  591. {
  592. vpx_free(pbi->mt_uabove_row[i]);
  593. pbi->mt_uabove_row[i] = NULL ;
  594. }
  595. vpx_free(pbi->mt_uabove_row);
  596. pbi->mt_uabove_row = NULL ;
  597. }
  598. if (pbi->mt_vabove_row)
  599. {
  600. for (i=0; i< mb_rows; i++)
  601. {
  602. vpx_free(pbi->mt_vabove_row[i]);
  603. pbi->mt_vabove_row[i] = NULL ;
  604. }
  605. vpx_free(pbi->mt_vabove_row);
  606. pbi->mt_vabove_row = NULL ;
  607. }
  608. /* Free left_col buffers. */
  609. if (pbi->mt_yleft_col)
  610. {
  611. for (i=0; i< mb_rows; i++)
  612. {
  613. vpx_free(pbi->mt_yleft_col[i]);
  614. pbi->mt_yleft_col[i] = NULL ;
  615. }
  616. vpx_free(pbi->mt_yleft_col);
  617. pbi->mt_yleft_col = NULL ;
  618. }
  619. if (pbi->mt_uleft_col)
  620. {
  621. for (i=0; i< mb_rows; i++)
  622. {
  623. vpx_free(pbi->mt_uleft_col[i]);
  624. pbi->mt_uleft_col[i] = NULL ;
  625. }
  626. vpx_free(pbi->mt_uleft_col);
  627. pbi->mt_uleft_col = NULL ;
  628. }
  629. if (pbi->mt_vleft_col)
  630. {
  631. for (i=0; i< mb_rows; i++)
  632. {
  633. vpx_free(pbi->mt_vleft_col[i]);
  634. pbi->mt_vleft_col[i] = NULL ;
  635. }
  636. vpx_free(pbi->mt_vleft_col);
  637. pbi->mt_vleft_col = NULL ;
  638. }
  639. }
  640. }
  641. void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
  642. {
  643. VP8_COMMON *const pc = & pbi->common;
  644. int i;
  645. int uv_width;
  646. if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd))
  647. {
  648. vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows);
  649. /* our internal buffers are always multiples of 16 */
  650. if ((width & 0xf) != 0)
  651. width += 16 - (width & 0xf);
  652. if (width < 640) pbi->sync_range = 1;
  653. else if (width <= 1280) pbi->sync_range = 8;
  654. else if (width <= 2560) pbi->sync_range =16;
  655. else pbi->sync_range = 32;
  656. uv_width = width >>1;
  657. /* Allocate mutex */
  658. CHECK_MEM_ERROR(pbi->pmutex, vpx_malloc(sizeof(*pbi->pmutex) *
  659. pc->mb_rows));
  660. if (pbi->pmutex) {
  661. for (i = 0; i < pc->mb_rows; i++) {
  662. pthread_mutex_init(&pbi->pmutex[i], NULL);
  663. }
  664. }
  665. /* Allocate an int for each mb row. */
  666. CALLOC_ARRAY(pbi->mt_current_mb_col, pc->mb_rows);
  667. /* Allocate memory for above_row buffers. */
  668. CALLOC_ARRAY(pbi->mt_yabove_row, pc->mb_rows);
  669. for (i = 0; i < pc->mb_rows; i++)
  670. CHECK_MEM_ERROR(pbi->mt_yabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (width + (VP8BORDERINPIXELS<<1))));
  671. CALLOC_ARRAY(pbi->mt_uabove_row, pc->mb_rows);
  672. for (i = 0; i < pc->mb_rows; i++)
  673. CHECK_MEM_ERROR(pbi->mt_uabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS)));
  674. CALLOC_ARRAY(pbi->mt_vabove_row, pc->mb_rows);
  675. for (i = 0; i < pc->mb_rows; i++)
  676. CHECK_MEM_ERROR(pbi->mt_vabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS)));
  677. /* Allocate memory for left_col buffers. */
  678. CALLOC_ARRAY(pbi->mt_yleft_col, pc->mb_rows);
  679. for (i = 0; i < pc->mb_rows; i++)
  680. CHECK_MEM_ERROR(pbi->mt_yleft_col[i], vpx_calloc(sizeof(unsigned char) * 16, 1));
  681. CALLOC_ARRAY(pbi->mt_uleft_col, pc->mb_rows);
  682. for (i = 0; i < pc->mb_rows; i++)
  683. CHECK_MEM_ERROR(pbi->mt_uleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
  684. CALLOC_ARRAY(pbi->mt_vleft_col, pc->mb_rows);
  685. for (i = 0; i < pc->mb_rows; i++)
  686. CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
  687. }
  688. }
  689. void vp8_decoder_remove_threads(VP8D_COMP *pbi)
  690. {
  691. /* shutdown MB Decoding thread; */
  692. if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd))
  693. {
  694. int i;
  695. protected_write(&pbi->mt_mutex, &pbi->b_multithreaded_rd, 0);
  696. /* allow all threads to exit */
  697. for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
  698. {
  699. sem_post(&pbi->h_event_start_decoding[i]);
  700. pthread_join(pbi->h_decoding_thread[i], NULL);
  701. }
  702. for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
  703. {
  704. sem_destroy(&pbi->h_event_start_decoding[i]);
  705. }
  706. sem_destroy(&pbi->h_event_end_decoding);
  707. vpx_free(pbi->h_decoding_thread);
  708. pbi->h_decoding_thread = NULL;
  709. vpx_free(pbi->h_event_start_decoding);
  710. pbi->h_event_start_decoding = NULL;
  711. vpx_free(pbi->mb_row_di);
  712. pbi->mb_row_di = NULL ;
  713. vpx_free(pbi->de_thread_data);
  714. pbi->de_thread_data = NULL;
  715. }
  716. pthread_mutex_destroy(&pbi->mt_mutex);
  717. }
  718. void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
  719. {
  720. VP8_COMMON *pc = &pbi->common;
  721. unsigned int i;
  722. int j;
  723. int filter_level = pc->filter_level;
  724. YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME];
  725. if (filter_level)
  726. {
  727. /* Set above_row buffer to 127 for decoding first MB row */
  728. memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, yv12_fb_new->y_width + 5);
  729. memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5);
  730. memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5);
  731. for (j=1; j<pc->mb_rows; j++)
  732. {
  733. memset(pbi->mt_yabove_row[j] + VP8BORDERINPIXELS-1, (unsigned char)129, 1);
  734. memset(pbi->mt_uabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
  735. memset(pbi->mt_vabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
  736. }
  737. /* Set left_col to 129 initially */
  738. for (j=0; j<pc->mb_rows; j++)
  739. {
  740. memset(pbi->mt_yleft_col[j], (unsigned char)129, 16);
  741. memset(pbi->mt_uleft_col[j], (unsigned char)129, 8);
  742. memset(pbi->mt_vleft_col[j], (unsigned char)129, 8);
  743. }
  744. /* Initialize the loop filter for this frame. */
  745. vp8_loop_filter_frame_init(pc, &pbi->mb, filter_level);
  746. }
  747. else
  748. vp8_setup_intra_recon_top_line(yv12_fb_new);
  749. setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count);
  750. for (i = 0; i < pbi->decoding_thread_count; i++)
  751. sem_post(&pbi->h_event_start_decoding[i]);
  752. mt_decode_mb_rows(pbi, xd, 0);
  753. sem_wait(&pbi->h_event_end_decoding); /* add back for each frame */
  754. }