Multiply32.cc 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. #include "Multiply32.hh"
  2. #include "PixelOperations.hh"
  3. #include <cstring>
  4. namespace openmsx {
  5. // class Multiply32<uint32_t>
  6. Multiply32<uint32_t>::Multiply32(const PixelOperations<uint32_t>& /*pixelOps*/)
  7. {
  8. // nothing
  9. }
  10. // class Multiply32<uint16_t>
  11. // gcc can optimize these rotate functions to just one instruction.
  12. // We don't really need a rotate, but we do need a shift over a positive or
  13. // negative (not known at compile time) distance, rotate handles this just fine.
  14. // Note that 0 <= n < 32; on x86 this doesn't matter but on PPC it does.
  15. static inline uint32_t rotLeft(uint32_t a, unsigned n)
  16. {
  17. return (a << n) | (a >> (32 - n));
  18. }
  19. Multiply32<uint16_t>::Multiply32(const PixelOperations<uint16_t>& pixelOps)
  20. {
  21. Rmask1 = pixelOps.getRmask();
  22. Gmask1 = pixelOps.getGmask();
  23. Bmask1 = pixelOps.getBmask();
  24. Rshift1 = ((2 + pixelOps.getRloss()) - pixelOps.getRshift()) & 31;
  25. Gshift1 = ((2 + pixelOps.getGloss()) - pixelOps.getGshift()) & 31;
  26. Bshift1 = ((2 + pixelOps.getBloss()) - pixelOps.getBshift()) & 31;
  27. Rmask2 = ((1 << (2 + pixelOps.getRloss())) - 1) <<
  28. (10 + pixelOps.getRshift() - 2 * (2 + pixelOps.getRloss()));
  29. Gmask2 = ((1 << (2 + pixelOps.getGloss())) - 1) <<
  30. (10 + pixelOps.getGshift() - 2 * (2 + pixelOps.getGloss()));
  31. Bmask2 = ((1 << (2 + pixelOps.getBloss())) - 1) <<
  32. (10 + pixelOps.getBshift() - 2 * (2 + pixelOps.getBloss()));
  33. Rshift2 = (2 * (2 + pixelOps.getRloss()) - pixelOps.getRshift() - 10) & 31;
  34. Gshift2 = (2 * (2 + pixelOps.getGloss()) - pixelOps.getGshift() - 10) & 31;
  35. Bshift2 = (2 * (2 + pixelOps.getBloss()) - pixelOps.getBshift() - 10) & 31;
  36. Rshift3 = (Rshift1 + 0) & 31;
  37. Gshift3 = (Gshift1 + 10) & 31;
  38. Bshift3 = (Bshift1 + 20) & 31;
  39. factor = 0;
  40. memset(tab, 0, sizeof(tab));
  41. }
  42. void Multiply32<uint16_t>::setFactor32(unsigned f)
  43. {
  44. if (factor == f) {
  45. return;
  46. }
  47. factor = f;
  48. for (unsigned p = 0; p < 0x10000; ++p) {
  49. uint32_t r = rotLeft((p & Rmask1), Rshift1) |
  50. rotLeft((p & Rmask2), Rshift2);
  51. uint32_t g = rotLeft((p & Gmask1), Gshift1) |
  52. rotLeft((p & Gmask2), Gshift2);
  53. uint32_t b = rotLeft((p & Bmask1), Bshift1) |
  54. rotLeft((p & Bmask2), Bshift2);
  55. tab[p] = (((r * factor) >> 8) << 0) |
  56. (((g * factor) >> 8) << 10) |
  57. (((b * factor) >> 8) << 20);
  58. }
  59. }
  60. } // namespace openmsx