Deflicker.cc 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. #include "Deflicker.hh"
  2. #include "RawFrame.hh"
  3. #include "PixelOperations.hh"
  4. #include "one_of.hh"
  5. #include "unreachable.hh"
  6. #include "vla.hh"
  7. #include "build-info.hh"
  8. #include <memory>
  9. #ifdef __SSE2__
  10. #include <emmintrin.h>
  11. #endif
  12. namespace openmsx {
  13. template<typename Pixel> class DeflickerImpl final : public Deflicker
  14. {
  15. public:
  16. DeflickerImpl(const PixelFormat& format,
  17. std::unique_ptr<RawFrame>* lastFrames);
  18. private:
  19. const void* getLineInfo(
  20. unsigned line, unsigned& width,
  21. void* buf, unsigned bufWidth) const override;
  22. PixelOperations<Pixel> pixelOps;
  23. };
  24. std::unique_ptr<Deflicker> Deflicker::create(
  25. const PixelFormat& format,
  26. std::unique_ptr<RawFrame>* lastFrames)
  27. {
  28. #if HAVE_16BPP
  29. if (format.getBytesPerPixel() == 2) {
  30. return std::make_unique<DeflickerImpl<uint16_t>>(format, lastFrames);
  31. }
  32. #endif
  33. #if HAVE_32BPP
  34. if (format.getBytesPerPixel() == 4) {
  35. return std::make_unique<DeflickerImpl<uint32_t>>(format, lastFrames);
  36. }
  37. #endif
  38. UNREACHABLE; return nullptr; // avoid warning
  39. }
  40. Deflicker::Deflicker(const PixelFormat& format,
  41. std::unique_ptr<RawFrame>* lastFrames_)
  42. : FrameSource(format)
  43. , lastFrames(lastFrames_)
  44. {
  45. }
  46. void Deflicker::init()
  47. {
  48. FrameSource::init(FIELD_NONINTERLACED);
  49. setHeight(lastFrames[0]->getHeight());
  50. }
  51. unsigned Deflicker::getLineWidth(unsigned line) const
  52. {
  53. return lastFrames[0]->getLineWidthDirect(line);
  54. }
  55. template<typename Pixel>
  56. DeflickerImpl<Pixel>::DeflickerImpl(const PixelFormat& format,
  57. std::unique_ptr<RawFrame>* lastFrames_)
  58. : Deflicker(format, lastFrames_)
  59. , pixelOps(format)
  60. {
  61. }
  62. #ifdef __SSE2__
  63. template<typename Pixel>
  64. static __m128i blend(__m128i x, __m128i y, Pixel blendMask)
  65. {
  66. if (sizeof(Pixel) == 4) {
  67. // 32bpp
  68. return _mm_avg_epu8(x, y);
  69. } else {
  70. // 16bpp, (x & y) + (((x ^ y) & blendMask) >> 1)
  71. __m128i m = _mm_set1_epi16(blendMask);
  72. __m128i a = _mm_and_si128(x, y);
  73. __m128i b = _mm_xor_si128(x, y);
  74. __m128i c = _mm_and_si128(b, m);
  75. __m128i d = _mm_srli_epi16(c, 1);
  76. return _mm_add_epi16(a, d);
  77. }
  78. }
  79. template<typename Pixel>
  80. static __m128i uload(const Pixel* ptr, ptrdiff_t byteOffst)
  81. {
  82. auto* p8 = reinterpret_cast<const char *>(ptr);
  83. auto* p128 = reinterpret_cast<const __m128i*>(p8 + byteOffst);
  84. return _mm_loadu_si128(p128);
  85. }
  86. template<typename Pixel>
  87. static void ustore(Pixel* ptr, ptrdiff_t byteOffst, __m128i val)
  88. {
  89. auto* p8 = reinterpret_cast< char *>(ptr);
  90. auto* p128 = reinterpret_cast<__m128i*>(p8 + byteOffst);
  91. return _mm_storeu_si128(p128, val);
  92. }
  93. template<typename Pixel>
  94. static __m128i compare(__m128i x, __m128i y)
  95. {
  96. static_assert(sizeof(Pixel) == one_of(2u, 4u));
  97. if (sizeof(Pixel) == 4) {
  98. return _mm_cmpeq_epi32(x, y);
  99. } else {
  100. return _mm_cmpeq_epi16(x, y);
  101. }
  102. }
  103. #endif
  104. template<typename Pixel>
  105. const void* DeflickerImpl<Pixel>::getLineInfo(
  106. unsigned line, unsigned& width, void* buf_, unsigned bufWidth) const
  107. {
  108. unsigned width0 = lastFrames[0]->getLineWidthDirect(line);
  109. unsigned width1 = lastFrames[1]->getLineWidthDirect(line);
  110. unsigned width2 = lastFrames[2]->getLineWidthDirect(line);
  111. unsigned width3 = lastFrames[3]->getLineWidthDirect(line);
  112. const Pixel* line0 = lastFrames[0]->template getLinePtrDirect<Pixel>(line);
  113. const Pixel* line1 = lastFrames[1]->template getLinePtrDirect<Pixel>(line);
  114. const Pixel* line2 = lastFrames[2]->template getLinePtrDirect<Pixel>(line);
  115. const Pixel* line3 = lastFrames[3]->template getLinePtrDirect<Pixel>(line);
  116. if ((width0 != width3) || (width0 != width2) || (width0 != width1)) {
  117. // Not all the same width.
  118. width = width0;
  119. return line0;
  120. }
  121. // Prefer to write directly to the output buffer, if that's not
  122. // possible store the intermediate result in a temp buffer.
  123. VLA_SSE_ALIGNED(Pixel, buf2, width0);
  124. auto* buf = static_cast<Pixel*>(buf_);
  125. Pixel* out = (width0 <= bufWidth) ? buf : buf2;
  126. // Detect pixels that alternate between two different color values and
  127. // replace those with the average color. We search for an alternating
  128. // sequence with length (at least) 4. Or IOW we look for "A B A B".
  129. // The implementation below also detects a constant pixel value
  130. // "A A A A" as alternating between "A" and "A", but that's fine.
  131. Pixel* dst = out;
  132. unsigned remaining = width0;
  133. #ifdef __SSE2__
  134. size_t pixelsPerSSE = sizeof(__m128i) / sizeof(Pixel);
  135. size_t widthSSE = remaining & ~(pixelsPerSSE - 1); // rounded down to a multiple of pixels in a SSE register
  136. line0 += widthSSE;
  137. line1 += widthSSE;
  138. line2 += widthSSE;
  139. line3 += widthSSE;
  140. dst += widthSSE;
  141. auto byteOffst = -ptrdiff_t(widthSSE * sizeof(Pixel));
  142. Pixel blendMask = pixelOps.getBlendMask();
  143. while (byteOffst < 0) {
  144. __m128i a0 = uload(line0, byteOffst);
  145. __m128i a1 = uload(line1, byteOffst);
  146. __m128i a2 = uload(line2, byteOffst);
  147. __m128i a3 = uload(line3, byteOffst);
  148. __m128i e02 = compare<Pixel>(a0, a2); // a0 == a2
  149. __m128i e13 = compare<Pixel>(a1, a3); // a1 == a3
  150. __m128i cnd = _mm_and_si128(e02, e13); // (a0==a2) && (a1==a3)
  151. __m128i a01 = blend(a0, a1, blendMask);
  152. __m128i p = _mm_xor_si128(a0, a01);
  153. __m128i q = _mm_and_si128(p, cnd);
  154. __m128i r = _mm_xor_si128(q, a0); // select(a0, a01, cnd)
  155. ustore(dst, byteOffst, r);
  156. byteOffst += sizeof(__m128i);
  157. }
  158. remaining &= pixelsPerSSE - 1;
  159. #endif
  160. for (unsigned x = 0; x < remaining; ++x) {
  161. dst[x] = ((line0[x] == line2[x]) && (line1[x] == line3[x]))
  162. ? pixelOps.template blend<1, 1>(line0[x], line1[x])
  163. : line0[x];
  164. }
  165. if (width0 <= bufWidth) {
  166. // It it already fits, we're done
  167. width = width0;
  168. } else {
  169. // Otherwise scale so that it does fit.
  170. width = bufWidth;
  171. scaleLine(out, buf, width0, bufWidth);
  172. }
  173. return buf;
  174. }
  175. } // namespace openmsx