ZMBVEncoder.cc 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433
  1. // Code based on DOSBox-0.65
  2. #include "ZMBVEncoder.hh"
  3. #include "FrameSource.hh"
  4. #include "PixelOperations.hh"
  5. #include "endian.hh"
  6. #include "ranges.hh"
  7. #include "unreachable.hh"
  8. #include <cassert>
  9. #include <cstdlib>
  10. #include <cstring>
  11. #include <cmath>
  12. namespace openmsx {
  13. constexpr uint8_t DBZV_VERSION_HIGH = 0;
  14. constexpr uint8_t DBZV_VERSION_LOW = 1;
  15. constexpr uint8_t COMPRESSION_ZLIB = 1;
  16. constexpr unsigned MAX_VECTOR = 16;
  17. constexpr unsigned BLOCK_WIDTH = MAX_VECTOR;
  18. constexpr unsigned BLOCK_HEIGHT = MAX_VECTOR;
  19. constexpr unsigned FLAG_KEYFRAME = 0x01;
  20. struct CodecVector {
  21. float cost() const {
  22. float c = sqrtf(float(x * x + y * y));
  23. if ((x == 0) || (y == 0)) {
  24. // no penalty for purely horizontal/vertical offset
  25. c *= 1.0f;
  26. } else if (abs(x) == abs(y)) {
  27. // small penalty for pure diagonal
  28. c *= 2.0f;
  29. } else {
  30. // bigger penalty for 'random' direction
  31. c *= 4.0f;
  32. }
  33. return c;
  34. }
  35. int x;
  36. int y;
  37. };
  38. static inline bool operator<(const CodecVector& l, const CodecVector& r)
  39. {
  40. return l.cost() < r.cost();
  41. }
  42. constexpr unsigned VECTOR_TAB_SIZE =
  43. 1 + // center
  44. 8 * MAX_VECTOR + // horizontal, vertial, diagonal
  45. MAX_VECTOR * MAX_VECTOR - 2 * MAX_VECTOR; // rest (only MAX_VECTOR/2)
  46. CodecVector vectorTable[VECTOR_TAB_SIZE];
  47. struct KeyframeHeader {
  48. uint8_t high_version;
  49. uint8_t low_version;
  50. uint8_t compression;
  51. uint8_t format;
  52. uint8_t blockwidth;
  53. uint8_t blockheight;
  54. };
  55. static inline void writePixel(
  56. const PixelOperations<uint16_t>& pixelOps,
  57. uint16_t pixel, Endian::L16& dest)
  58. {
  59. unsigned r = pixelOps.red256(pixel);
  60. unsigned g = pixelOps.green256(pixel);
  61. unsigned b = pixelOps.blue256(pixel);
  62. dest = ((r & 0xF8) << (11 - 3)) | ((g & 0xFC) << (5 - 2)) | (b >> 3);
  63. }
  64. static inline void writePixel(
  65. const PixelOperations<unsigned>& pixelOps,
  66. unsigned pixel, Endian::L32& dest)
  67. {
  68. unsigned r = pixelOps.red256(pixel);
  69. unsigned g = pixelOps.green256(pixel);
  70. unsigned b = pixelOps.blue256(pixel);
  71. dest = (r << 16) | (g << 8) | b;
  72. }
  73. static void createVectorTable()
  74. {
  75. unsigned p = 0;
  76. // center
  77. vectorTable[p] = {0, 0};
  78. p += 1;
  79. // horizontal, vertial, diagonal
  80. for (int i = 1; i <= int(MAX_VECTOR); ++i) {
  81. vectorTable[p + 0] = { i, 0};
  82. vectorTable[p + 1] = {-i, 0};
  83. vectorTable[p + 2] = { 0, i};
  84. vectorTable[p + 3] = { 0,-i};
  85. vectorTable[p + 4] = { i, i};
  86. vectorTable[p + 5] = {-i, i};
  87. vectorTable[p + 6] = { i,-i};
  88. vectorTable[p + 7] = {-i,-i};
  89. p += 8;
  90. }
  91. // rest
  92. for (int y = 1; y <= int(MAX_VECTOR / 2); ++y) {
  93. for (int x = 1; x <= int(MAX_VECTOR / 2); ++x) {
  94. if (x == y) continue; // already have diagonal
  95. vectorTable[p + 0] = { x, y};
  96. vectorTable[p + 1] = {-x, y};
  97. vectorTable[p + 2] = { x,-y};
  98. vectorTable[p + 3] = {-x,-y};
  99. p += 4;
  100. }
  101. }
  102. assert(p == VECTOR_TAB_SIZE);
  103. ranges::sort(vectorTable);
  104. }
  105. ZMBVEncoder::ZMBVEncoder(unsigned width_, unsigned height_, unsigned bpp)
  106. : width(width_)
  107. , height(height_)
  108. {
  109. setupBuffers(bpp);
  110. createVectorTable();
  111. memset(&zstream, 0, sizeof(zstream));
  112. deflateInit(&zstream, 6); // compression level
  113. // I did a small test: compression level vs compression speed
  114. // (recorded Space Manbow intro, video only)
  115. //
  116. // level | time | size
  117. // ------+--------+----------
  118. // 0 | 1m12.6 | 139442594
  119. // 1 | 1m12.1 | 5217288
  120. // 2 | 1m10.8 | 4887258
  121. // 3 | 1m11.8 | 4610668
  122. // 4 | 1m13.1 | 3791932 <-- old default
  123. // 5 | 1m14.2 | 3602078
  124. // 6 | 1m14.5 | 3363766 <-- current default
  125. // 7 | 1m15.8 | 3333938
  126. // 8 | 1m25.0 | 3301168
  127. // 9 | 2m04.1 | 3253706
  128. //
  129. // Level 6 seems a good compromise between size/speed for THIS test.
  130. }
  131. void ZMBVEncoder::setupBuffers(unsigned bpp)
  132. {
  133. switch (bpp) {
  134. #if HAVE_16BPP
  135. case 15:
  136. case 16:
  137. format = ZMBV_FORMAT_16BPP;
  138. pixelSize = 2;
  139. break;
  140. #endif
  141. #if HAVE_32BPP
  142. case 32:
  143. format = ZMBV_FORMAT_32BPP;
  144. pixelSize = 4;
  145. break;
  146. #endif
  147. default:
  148. UNREACHABLE;
  149. }
  150. pitch = width + 2 * MAX_VECTOR;
  151. unsigned bufsize = (height + 2 * MAX_VECTOR) * pitch * pixelSize + 2048;
  152. oldframe.resize(bufsize);
  153. newframe.resize(bufsize);
  154. memset(oldframe.data(), 0, bufsize);
  155. memset(newframe.data(), 0, bufsize);
  156. work.resize(bufsize);
  157. outputSize = neededSize();
  158. output.resize(outputSize);
  159. assert((width % BLOCK_WIDTH ) == 0);
  160. assert((height % BLOCK_HEIGHT) == 0);
  161. unsigned xblocks = width / BLOCK_WIDTH;
  162. unsigned yblocks = height / BLOCK_HEIGHT;
  163. blockOffsets.resize(xblocks * yblocks);
  164. for (unsigned y = 0; y < yblocks; ++y) {
  165. for (unsigned x = 0; x < xblocks; ++x) {
  166. blockOffsets[y * xblocks + x] =
  167. ((y * BLOCK_HEIGHT) + MAX_VECTOR) * pitch +
  168. (x * BLOCK_WIDTH) + MAX_VECTOR;
  169. }
  170. }
  171. }
  172. unsigned ZMBVEncoder::neededSize() const
  173. {
  174. unsigned f = pixelSize;
  175. f = f * width * height + 2 * (1 + (width / 8)) * (1 + (height / 8)) + 1024;
  176. return f + f / 1000;
  177. }
  178. template<class P>
  179. unsigned ZMBVEncoder::possibleBlock(int vx, int vy, unsigned offset)
  180. {
  181. int ret = 0;
  182. auto* pold = &(reinterpret_cast<P*>(oldframe.data()))[offset + (vy * pitch) + vx];
  183. auto* pnew = &(reinterpret_cast<P*>(newframe.data()))[offset];
  184. for (unsigned y = 0; y < BLOCK_HEIGHT; y += 4) {
  185. for (unsigned x = 0; x < BLOCK_WIDTH; x += 4) {
  186. if (pold[x] != pnew[x]) ++ret;
  187. }
  188. pold += pitch * 4;
  189. pnew += pitch * 4;
  190. }
  191. return ret;
  192. }
  193. template<class P>
  194. unsigned ZMBVEncoder::compareBlock(int vx, int vy, unsigned offset)
  195. {
  196. int ret = 0;
  197. auto* pold = &(reinterpret_cast<P*>(oldframe.data()))[offset + (vy * pitch) + vx];
  198. auto* pnew = &(reinterpret_cast<P*>(newframe.data()))[offset];
  199. for (unsigned y = 0; y < BLOCK_HEIGHT; ++y) {
  200. for (unsigned x = 0; x < BLOCK_WIDTH; ++x) {
  201. if (pold[x] != pnew[x]) ++ret;
  202. }
  203. pold += pitch;
  204. pnew += pitch;
  205. }
  206. return ret;
  207. }
  208. template<class P>
  209. void ZMBVEncoder::addXorBlock(
  210. const PixelOperations<P>& pixelOps, int vx, int vy, unsigned offset, unsigned& workUsed)
  211. {
  212. using LE_P = typename Endian::Little<P>::type;
  213. auto* pold = &(reinterpret_cast<P*>(oldframe.data()))[offset + (vy * pitch) + vx];
  214. auto* pnew = &(reinterpret_cast<P*>(newframe.data()))[offset];
  215. for (unsigned y = 0; y < BLOCK_HEIGHT; ++y) {
  216. for (unsigned x = 0; x < BLOCK_WIDTH; ++x) {
  217. P pxor = pnew[x] ^ pold[x];
  218. writePixel(pixelOps, pxor, *reinterpret_cast<LE_P*>(&work[workUsed]));
  219. workUsed += sizeof(P);
  220. }
  221. pold += pitch;
  222. pnew += pitch;
  223. }
  224. }
  225. template<class P>
  226. void ZMBVEncoder::addXorFrame(const PixelFormat& pixelFormat, unsigned& workUsed)
  227. {
  228. PixelOperations<P> pixelOps(pixelFormat);
  229. auto* vectors = reinterpret_cast<int8_t*>(&work[workUsed]);
  230. unsigned xblocks = width / BLOCK_WIDTH;
  231. unsigned yblocks = height / BLOCK_HEIGHT;
  232. unsigned blockcount = xblocks * yblocks;
  233. // Align the following xor data on 4 byte boundary
  234. workUsed = (workUsed + blockcount * 2 + 3) & ~3;
  235. int bestvx = 0;
  236. int bestvy = 0;
  237. for (unsigned b = 0; b < blockcount; ++b) {
  238. unsigned offset = blockOffsets[b];
  239. // first try best vector of previous block
  240. unsigned bestchange = compareBlock<P>(bestvx, bestvy, offset);
  241. if (bestchange >= 4) {
  242. int possibles = 64;
  243. for (auto& v : vectorTable) {
  244. if (possibleBlock<P>(v.x, v.y, offset) < 4) {
  245. unsigned testchange = compareBlock<P>(v.x, v.y, offset);
  246. if (testchange < bestchange) {
  247. bestchange = testchange;
  248. bestvx = v.x;
  249. bestvy = v.y;
  250. if (bestchange < 4) break;
  251. }
  252. --possibles;
  253. if (possibles == 0) break;
  254. }
  255. }
  256. }
  257. vectors[b * 2 + 0] = (bestvx << 1);
  258. vectors[b * 2 + 1] = (bestvy << 1);
  259. if (bestchange) {
  260. vectors[b * 2 + 0] |= 1;
  261. addXorBlock<P>(pixelOps, bestvx, bestvy, offset, workUsed);
  262. }
  263. }
  264. }
  265. template<class P>
  266. void ZMBVEncoder::addFullFrame(const PixelFormat& pixelFormat, unsigned& workUsed)
  267. {
  268. using LE_P = typename Endian::Little<P>::type;
  269. PixelOperations<P> pixelOps(pixelFormat);
  270. auto* readFrame =
  271. &newframe[pixelSize * (MAX_VECTOR + MAX_VECTOR * pitch)];
  272. for (unsigned y = 0; y < height; ++y) {
  273. auto* pixelsIn = reinterpret_cast<P*> (readFrame);
  274. auto* pixelsOut = reinterpret_cast<LE_P*>(&work[workUsed]);
  275. for (unsigned x = 0; x < width; ++x) {
  276. writePixel(pixelOps, pixelsIn[x], pixelsOut[x]);
  277. }
  278. readFrame += pitch * sizeof(P);
  279. workUsed += width * sizeof(P);
  280. }
  281. }
  282. const void* ZMBVEncoder::getScaledLine(FrameSource* frame, unsigned y, void* workBuf_) const
  283. {
  284. #if HAVE_32BPP
  285. if (pixelSize == 4) { // 32bpp
  286. auto* workBuf = static_cast<uint32_t*>(workBuf_);
  287. switch (height) {
  288. case 240:
  289. return frame->getLinePtr320_240(y, workBuf);
  290. case 480:
  291. return frame->getLinePtr640_480(y, workBuf);
  292. case 720:
  293. return frame->getLinePtr960_720(y, workBuf);
  294. default:
  295. UNREACHABLE;
  296. }
  297. }
  298. #endif
  299. #if HAVE_16BPP
  300. if (pixelSize == 2) { // 15bpp or 16bpp
  301. auto* workBuf = static_cast<uint16_t*>(workBuf_);
  302. switch (height) {
  303. case 240:
  304. return frame->getLinePtr320_240(y, workBuf);
  305. case 480:
  306. return frame->getLinePtr640_480(y, workBuf);
  307. case 720:
  308. return frame->getLinePtr960_720(y, workBuf);
  309. default:
  310. UNREACHABLE;
  311. }
  312. }
  313. #endif
  314. UNREACHABLE;
  315. return nullptr; // avoid warning
  316. }
  317. void ZMBVEncoder::compressFrame(bool keyFrame, FrameSource* frame,
  318. void*& buffer, unsigned& written)
  319. {
  320. std::swap(newframe, oldframe); // replace oldframe with newframe
  321. // Reset the work buffer
  322. unsigned workUsed = 0;
  323. unsigned writeDone = 1;
  324. uint8_t* writeBuf = output.data();
  325. output[0] = 0; // first byte contains info about this frame
  326. if (keyFrame) {
  327. output[0] |= FLAG_KEYFRAME;
  328. auto* header = reinterpret_cast<KeyframeHeader*>(
  329. writeBuf + writeDone);
  330. header->high_version = DBZV_VERSION_HIGH;
  331. header->low_version = DBZV_VERSION_LOW;
  332. header->compression = COMPRESSION_ZLIB;
  333. header->format = format;
  334. header->blockwidth = BLOCK_WIDTH;
  335. header->blockheight = BLOCK_HEIGHT;
  336. writeDone += sizeof(KeyframeHeader);
  337. deflateReset(&zstream); // restart deflate
  338. }
  339. // copy lines (to add black border)
  340. unsigned linePitch = pitch * pixelSize;
  341. unsigned lineWidth = width * pixelSize;
  342. uint8_t* dest =
  343. &newframe[pixelSize * (MAX_VECTOR + MAX_VECTOR * pitch)];
  344. for (unsigned i = 0; i < height; ++i) {
  345. auto* scaled = getScaledLine(frame, i, dest);
  346. if (scaled != dest) memcpy(dest, scaled, lineWidth);
  347. dest += linePitch;
  348. }
  349. // Add the frame data.
  350. if (keyFrame) {
  351. // Key frame: full frame data.
  352. switch (pixelSize) {
  353. #if HAVE_16BPP
  354. case 2:
  355. addFullFrame<uint16_t>(frame->getPixelFormat(), workUsed);
  356. break;
  357. #endif
  358. #if HAVE_32BPP
  359. case 4:
  360. addFullFrame<uint32_t>(frame->getPixelFormat(), workUsed);
  361. break;
  362. #endif
  363. default:
  364. UNREACHABLE;
  365. }
  366. } else {
  367. // Non-key frame: delta frame data.
  368. switch (pixelSize) {
  369. #if HAVE_16BPP
  370. case 2:
  371. addXorFrame<uint16_t>(frame->getPixelFormat(), workUsed);
  372. break;
  373. #endif
  374. #if HAVE_32BPP
  375. case 4:
  376. addXorFrame<uint32_t>(frame->getPixelFormat(), workUsed);
  377. break;
  378. #endif
  379. default:
  380. UNREACHABLE;
  381. }
  382. }
  383. // Compress the frame data with zlib.
  384. zstream.next_in = work.data();
  385. zstream.avail_in = workUsed;
  386. zstream.total_in = 0;
  387. zstream.next_out = static_cast<Bytef*>(writeBuf + writeDone);
  388. zstream.avail_out = outputSize - writeDone;
  389. zstream.total_out = 0;
  390. auto r = deflate(&zstream, Z_SYNC_FLUSH);
  391. assert(r == Z_OK); (void)r;
  392. buffer = output.data();
  393. written = writeDone + zstream.total_out;
  394. }
  395. } // namespace openmsx