123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433 |
- // Code based on DOSBox-0.65
- #include "ZMBVEncoder.hh"
- #include "FrameSource.hh"
- #include "PixelOperations.hh"
- #include "endian.hh"
- #include "ranges.hh"
- #include "unreachable.hh"
- #include <cassert>
- #include <cstdlib>
- #include <cstring>
- #include <cmath>
- namespace openmsx {
- constexpr uint8_t DBZV_VERSION_HIGH = 0;
- constexpr uint8_t DBZV_VERSION_LOW = 1;
- constexpr uint8_t COMPRESSION_ZLIB = 1;
- constexpr unsigned MAX_VECTOR = 16;
- constexpr unsigned BLOCK_WIDTH = MAX_VECTOR;
- constexpr unsigned BLOCK_HEIGHT = MAX_VECTOR;
- constexpr unsigned FLAG_KEYFRAME = 0x01;
- struct CodecVector {
- float cost() const {
- float c = sqrtf(float(x * x + y * y));
- if ((x == 0) || (y == 0)) {
- // no penalty for purely horizontal/vertical offset
- c *= 1.0f;
- } else if (abs(x) == abs(y)) {
- // small penalty for pure diagonal
- c *= 2.0f;
- } else {
- // bigger penalty for 'random' direction
- c *= 4.0f;
- }
- return c;
- }
- int x;
- int y;
- };
- static inline bool operator<(const CodecVector& l, const CodecVector& r)
- {
- return l.cost() < r.cost();
- }
- constexpr unsigned VECTOR_TAB_SIZE =
- 1 + // center
- 8 * MAX_VECTOR + // horizontal, vertial, diagonal
- MAX_VECTOR * MAX_VECTOR - 2 * MAX_VECTOR; // rest (only MAX_VECTOR/2)
- CodecVector vectorTable[VECTOR_TAB_SIZE];
- struct KeyframeHeader {
- uint8_t high_version;
- uint8_t low_version;
- uint8_t compression;
- uint8_t format;
- uint8_t blockwidth;
- uint8_t blockheight;
- };
- static inline void writePixel(
- const PixelOperations<uint16_t>& pixelOps,
- uint16_t pixel, Endian::L16& dest)
- {
- unsigned r = pixelOps.red256(pixel);
- unsigned g = pixelOps.green256(pixel);
- unsigned b = pixelOps.blue256(pixel);
- dest = ((r & 0xF8) << (11 - 3)) | ((g & 0xFC) << (5 - 2)) | (b >> 3);
- }
- static inline void writePixel(
- const PixelOperations<unsigned>& pixelOps,
- unsigned pixel, Endian::L32& dest)
- {
- unsigned r = pixelOps.red256(pixel);
- unsigned g = pixelOps.green256(pixel);
- unsigned b = pixelOps.blue256(pixel);
- dest = (r << 16) | (g << 8) | b;
- }
- static void createVectorTable()
- {
- unsigned p = 0;
- // center
- vectorTable[p] = {0, 0};
- p += 1;
- // horizontal, vertial, diagonal
- for (int i = 1; i <= int(MAX_VECTOR); ++i) {
- vectorTable[p + 0] = { i, 0};
- vectorTable[p + 1] = {-i, 0};
- vectorTable[p + 2] = { 0, i};
- vectorTable[p + 3] = { 0,-i};
- vectorTable[p + 4] = { i, i};
- vectorTable[p + 5] = {-i, i};
- vectorTable[p + 6] = { i,-i};
- vectorTable[p + 7] = {-i,-i};
- p += 8;
- }
- // rest
- for (int y = 1; y <= int(MAX_VECTOR / 2); ++y) {
- for (int x = 1; x <= int(MAX_VECTOR / 2); ++x) {
- if (x == y) continue; // already have diagonal
- vectorTable[p + 0] = { x, y};
- vectorTable[p + 1] = {-x, y};
- vectorTable[p + 2] = { x,-y};
- vectorTable[p + 3] = {-x,-y};
- p += 4;
- }
- }
- assert(p == VECTOR_TAB_SIZE);
- ranges::sort(vectorTable);
- }
- ZMBVEncoder::ZMBVEncoder(unsigned width_, unsigned height_, unsigned bpp)
- : width(width_)
- , height(height_)
- {
- setupBuffers(bpp);
- createVectorTable();
- memset(&zstream, 0, sizeof(zstream));
- deflateInit(&zstream, 6); // compression level
- // I did a small test: compression level vs compression speed
- // (recorded Space Manbow intro, video only)
- //
- // level | time | size
- // ------+--------+----------
- // 0 | 1m12.6 | 139442594
- // 1 | 1m12.1 | 5217288
- // 2 | 1m10.8 | 4887258
- // 3 | 1m11.8 | 4610668
- // 4 | 1m13.1 | 3791932 <-- old default
- // 5 | 1m14.2 | 3602078
- // 6 | 1m14.5 | 3363766 <-- current default
- // 7 | 1m15.8 | 3333938
- // 8 | 1m25.0 | 3301168
- // 9 | 2m04.1 | 3253706
- //
- // Level 6 seems a good compromise between size/speed for THIS test.
- }
- void ZMBVEncoder::setupBuffers(unsigned bpp)
- {
- switch (bpp) {
- #if HAVE_16BPP
- case 15:
- case 16:
- format = ZMBV_FORMAT_16BPP;
- pixelSize = 2;
- break;
- #endif
- #if HAVE_32BPP
- case 32:
- format = ZMBV_FORMAT_32BPP;
- pixelSize = 4;
- break;
- #endif
- default:
- UNREACHABLE;
- }
- pitch = width + 2 * MAX_VECTOR;
- unsigned bufsize = (height + 2 * MAX_VECTOR) * pitch * pixelSize + 2048;
- oldframe.resize(bufsize);
- newframe.resize(bufsize);
- memset(oldframe.data(), 0, bufsize);
- memset(newframe.data(), 0, bufsize);
- work.resize(bufsize);
- outputSize = neededSize();
- output.resize(outputSize);
- assert((width % BLOCK_WIDTH ) == 0);
- assert((height % BLOCK_HEIGHT) == 0);
- unsigned xblocks = width / BLOCK_WIDTH;
- unsigned yblocks = height / BLOCK_HEIGHT;
- blockOffsets.resize(xblocks * yblocks);
- for (unsigned y = 0; y < yblocks; ++y) {
- for (unsigned x = 0; x < xblocks; ++x) {
- blockOffsets[y * xblocks + x] =
- ((y * BLOCK_HEIGHT) + MAX_VECTOR) * pitch +
- (x * BLOCK_WIDTH) + MAX_VECTOR;
- }
- }
- }
- unsigned ZMBVEncoder::neededSize() const
- {
- unsigned f = pixelSize;
- f = f * width * height + 2 * (1 + (width / 8)) * (1 + (height / 8)) + 1024;
- return f + f / 1000;
- }
- template<class P>
- unsigned ZMBVEncoder::possibleBlock(int vx, int vy, unsigned offset)
- {
- int ret = 0;
- auto* pold = &(reinterpret_cast<P*>(oldframe.data()))[offset + (vy * pitch) + vx];
- auto* pnew = &(reinterpret_cast<P*>(newframe.data()))[offset];
- for (unsigned y = 0; y < BLOCK_HEIGHT; y += 4) {
- for (unsigned x = 0; x < BLOCK_WIDTH; x += 4) {
- if (pold[x] != pnew[x]) ++ret;
- }
- pold += pitch * 4;
- pnew += pitch * 4;
- }
- return ret;
- }
- template<class P>
- unsigned ZMBVEncoder::compareBlock(int vx, int vy, unsigned offset)
- {
- int ret = 0;
- auto* pold = &(reinterpret_cast<P*>(oldframe.data()))[offset + (vy * pitch) + vx];
- auto* pnew = &(reinterpret_cast<P*>(newframe.data()))[offset];
- for (unsigned y = 0; y < BLOCK_HEIGHT; ++y) {
- for (unsigned x = 0; x < BLOCK_WIDTH; ++x) {
- if (pold[x] != pnew[x]) ++ret;
- }
- pold += pitch;
- pnew += pitch;
- }
- return ret;
- }
- template<class P>
- void ZMBVEncoder::addXorBlock(
- const PixelOperations<P>& pixelOps, int vx, int vy, unsigned offset, unsigned& workUsed)
- {
- using LE_P = typename Endian::Little<P>::type;
- auto* pold = &(reinterpret_cast<P*>(oldframe.data()))[offset + (vy * pitch) + vx];
- auto* pnew = &(reinterpret_cast<P*>(newframe.data()))[offset];
- for (unsigned y = 0; y < BLOCK_HEIGHT; ++y) {
- for (unsigned x = 0; x < BLOCK_WIDTH; ++x) {
- P pxor = pnew[x] ^ pold[x];
- writePixel(pixelOps, pxor, *reinterpret_cast<LE_P*>(&work[workUsed]));
- workUsed += sizeof(P);
- }
- pold += pitch;
- pnew += pitch;
- }
- }
- template<class P>
- void ZMBVEncoder::addXorFrame(const PixelFormat& pixelFormat, unsigned& workUsed)
- {
- PixelOperations<P> pixelOps(pixelFormat);
- auto* vectors = reinterpret_cast<int8_t*>(&work[workUsed]);
- unsigned xblocks = width / BLOCK_WIDTH;
- unsigned yblocks = height / BLOCK_HEIGHT;
- unsigned blockcount = xblocks * yblocks;
- // Align the following xor data on 4 byte boundary
- workUsed = (workUsed + blockcount * 2 + 3) & ~3;
- int bestvx = 0;
- int bestvy = 0;
- for (unsigned b = 0; b < blockcount; ++b) {
- unsigned offset = blockOffsets[b];
- // first try best vector of previous block
- unsigned bestchange = compareBlock<P>(bestvx, bestvy, offset);
- if (bestchange >= 4) {
- int possibles = 64;
- for (auto& v : vectorTable) {
- if (possibleBlock<P>(v.x, v.y, offset) < 4) {
- unsigned testchange = compareBlock<P>(v.x, v.y, offset);
- if (testchange < bestchange) {
- bestchange = testchange;
- bestvx = v.x;
- bestvy = v.y;
- if (bestchange < 4) break;
- }
- --possibles;
- if (possibles == 0) break;
- }
- }
- }
- vectors[b * 2 + 0] = (bestvx << 1);
- vectors[b * 2 + 1] = (bestvy << 1);
- if (bestchange) {
- vectors[b * 2 + 0] |= 1;
- addXorBlock<P>(pixelOps, bestvx, bestvy, offset, workUsed);
- }
- }
- }
- template<class P>
- void ZMBVEncoder::addFullFrame(const PixelFormat& pixelFormat, unsigned& workUsed)
- {
- using LE_P = typename Endian::Little<P>::type;
- PixelOperations<P> pixelOps(pixelFormat);
- auto* readFrame =
- &newframe[pixelSize * (MAX_VECTOR + MAX_VECTOR * pitch)];
- for (unsigned y = 0; y < height; ++y) {
- auto* pixelsIn = reinterpret_cast<P*> (readFrame);
- auto* pixelsOut = reinterpret_cast<LE_P*>(&work[workUsed]);
- for (unsigned x = 0; x < width; ++x) {
- writePixel(pixelOps, pixelsIn[x], pixelsOut[x]);
- }
- readFrame += pitch * sizeof(P);
- workUsed += width * sizeof(P);
- }
- }
- const void* ZMBVEncoder::getScaledLine(FrameSource* frame, unsigned y, void* workBuf_) const
- {
- #if HAVE_32BPP
- if (pixelSize == 4) { // 32bpp
- auto* workBuf = static_cast<uint32_t*>(workBuf_);
- switch (height) {
- case 240:
- return frame->getLinePtr320_240(y, workBuf);
- case 480:
- return frame->getLinePtr640_480(y, workBuf);
- case 720:
- return frame->getLinePtr960_720(y, workBuf);
- default:
- UNREACHABLE;
- }
- }
- #endif
- #if HAVE_16BPP
- if (pixelSize == 2) { // 15bpp or 16bpp
- auto* workBuf = static_cast<uint16_t*>(workBuf_);
- switch (height) {
- case 240:
- return frame->getLinePtr320_240(y, workBuf);
- case 480:
- return frame->getLinePtr640_480(y, workBuf);
- case 720:
- return frame->getLinePtr960_720(y, workBuf);
- default:
- UNREACHABLE;
- }
- }
- #endif
- UNREACHABLE;
- return nullptr; // avoid warning
- }
- void ZMBVEncoder::compressFrame(bool keyFrame, FrameSource* frame,
- void*& buffer, unsigned& written)
- {
- std::swap(newframe, oldframe); // replace oldframe with newframe
- // Reset the work buffer
- unsigned workUsed = 0;
- unsigned writeDone = 1;
- uint8_t* writeBuf = output.data();
- output[0] = 0; // first byte contains info about this frame
- if (keyFrame) {
- output[0] |= FLAG_KEYFRAME;
- auto* header = reinterpret_cast<KeyframeHeader*>(
- writeBuf + writeDone);
- header->high_version = DBZV_VERSION_HIGH;
- header->low_version = DBZV_VERSION_LOW;
- header->compression = COMPRESSION_ZLIB;
- header->format = format;
- header->blockwidth = BLOCK_WIDTH;
- header->blockheight = BLOCK_HEIGHT;
- writeDone += sizeof(KeyframeHeader);
- deflateReset(&zstream); // restart deflate
- }
- // copy lines (to add black border)
- unsigned linePitch = pitch * pixelSize;
- unsigned lineWidth = width * pixelSize;
- uint8_t* dest =
- &newframe[pixelSize * (MAX_VECTOR + MAX_VECTOR * pitch)];
- for (unsigned i = 0; i < height; ++i) {
- auto* scaled = getScaledLine(frame, i, dest);
- if (scaled != dest) memcpy(dest, scaled, lineWidth);
- dest += linePitch;
- }
- // Add the frame data.
- if (keyFrame) {
- // Key frame: full frame data.
- switch (pixelSize) {
- #if HAVE_16BPP
- case 2:
- addFullFrame<uint16_t>(frame->getPixelFormat(), workUsed);
- break;
- #endif
- #if HAVE_32BPP
- case 4:
- addFullFrame<uint32_t>(frame->getPixelFormat(), workUsed);
- break;
- #endif
- default:
- UNREACHABLE;
- }
- } else {
- // Non-key frame: delta frame data.
- switch (pixelSize) {
- #if HAVE_16BPP
- case 2:
- addXorFrame<uint16_t>(frame->getPixelFormat(), workUsed);
- break;
- #endif
- #if HAVE_32BPP
- case 4:
- addXorFrame<uint32_t>(frame->getPixelFormat(), workUsed);
- break;
- #endif
- default:
- UNREACHABLE;
- }
- }
- // Compress the frame data with zlib.
- zstream.next_in = work.data();
- zstream.avail_in = workUsed;
- zstream.total_in = 0;
- zstream.next_out = static_cast<Bytef*>(writeBuf + writeDone);
- zstream.avail_out = outputSize - writeDone;
- zstream.total_out = 0;
- auto r = deflate(&zstream, Z_SYNC_FLUSH);
- assert(r == Z_OK); (void)r;
- buffer = output.data();
- written = writeDone + zstream.total_out;
- }
- } // namespace openmsx
|