serialize.cc 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560
  1. #include "serialize.hh"
  2. #include "Base64.hh"
  3. #include "HexDump.hh"
  4. #include "XMLLoader.hh"
  5. #include "XMLElement.hh"
  6. #include "ConfigException.hh"
  7. #include "XMLException.hh"
  8. #include "DeltaBlock.hh"
  9. #include "MemBuffer.hh"
  10. #include "FileOperations.hh"
  11. #include "Version.hh"
  12. #include "Date.hh"
  13. #include "one_of.hh"
  14. #include "stl.hh"
  15. #include "build-info.hh"
  16. #include "cstdiop.hh" // for dup()
  17. #include <cstring>
  18. #include <iostream>
  19. #include <limits>
  20. using std::string;
  21. using std::string_view;
  22. namespace openmsx {
  23. template<typename Derived>
  24. void ArchiveBase<Derived>::attribute(const char* name, const char* value)
  25. {
  26. string valueStr(value);
  27. self().attribute(name, valueStr);
  28. }
  29. template class ArchiveBase<MemOutputArchive>;
  30. template class ArchiveBase<XmlOutputArchive>;
  31. ////
  32. unsigned OutputArchiveBase2::generateID1(const void* p)
  33. {
  34. #ifdef linux
  35. assert("Can't serialize ID of object located on the stack" &&
  36. !addressOnStack(p));
  37. #endif
  38. ++lastId;
  39. assert(!polyIdMap.contains(p));
  40. polyIdMap.emplace_noDuplicateCheck(p, lastId);
  41. return lastId;
  42. }
  43. unsigned OutputArchiveBase2::generateID2(
  44. const void* p, const std::type_info& typeInfo)
  45. {
  46. #ifdef linux
  47. assert("Can't serialize ID of object located on the stack" &&
  48. !addressOnStack(p));
  49. #endif
  50. ++lastId;
  51. auto key = std::pair(p, std::type_index(typeInfo));
  52. assert(!idMap.contains(key));
  53. idMap.emplace_noDuplicateCheck(key, lastId);
  54. return lastId;
  55. }
  56. unsigned OutputArchiveBase2::getID1(const void* p)
  57. {
  58. auto v = lookup(polyIdMap, p);
  59. return v ? *v : 0;
  60. }
  61. unsigned OutputArchiveBase2::getID2(
  62. const void* p, const std::type_info& typeInfo)
  63. {
  64. auto v = lookup(idMap, std::pair(p, std::type_index(typeInfo)));
  65. return v ? *v : 0;
  66. }
  67. template<typename Derived>
  68. void OutputArchiveBase<Derived>::serialize_blob(
  69. const char* tag, const void* data_, size_t len, bool /*diff*/)
  70. {
  71. auto* data = static_cast<const uint8_t*>(data_);
  72. string encoding;
  73. string tmp;
  74. if (false) {
  75. // useful for debugging
  76. encoding = "hex";
  77. tmp = HexDump::encode(data, len);
  78. } else if (false) {
  79. encoding = "base64";
  80. tmp = Base64::encode(data, len);
  81. } else {
  82. encoding = "gz-base64";
  83. // TODO check for overflow?
  84. auto dstLen = uLongf(len + len / 1000 + 12 + 1); // worst-case
  85. MemBuffer<uint8_t> buf(dstLen);
  86. if (compress2(buf.data(), &dstLen,
  87. reinterpret_cast<const Bytef*>(data),
  88. uLong(len), 9)
  89. != Z_OK) {
  90. throw MSXException("Error while compressing blob.");
  91. }
  92. tmp = Base64::encode(buf.data(), dstLen);
  93. }
  94. this->self().beginTag(tag);
  95. this->self().attribute("encoding", encoding);
  96. Saver<string> saver;
  97. saver(this->self(), tmp, false);
  98. this->self().endTag(tag);
  99. }
  100. template class OutputArchiveBase<MemOutputArchive>;
  101. template class OutputArchiveBase<XmlOutputArchive>;
  102. ////
  103. void* InputArchiveBase2::getPointer(unsigned id)
  104. {
  105. auto v = lookup(idMap, id);
  106. return v ? *v : nullptr;
  107. }
  108. void InputArchiveBase2::addPointer(unsigned id, const void* p)
  109. {
  110. assert(!idMap.contains(id));
  111. idMap.emplace_noDuplicateCheck(id, const_cast<void*>(p));
  112. }
  113. unsigned InputArchiveBase2::getId(const void* ptr) const
  114. {
  115. for (const auto& [id, pt] : idMap) {
  116. if (pt == ptr) return id;
  117. }
  118. return 0;
  119. }
  120. template<typename Derived>
  121. void InputArchiveBase<Derived>::serialize_blob(
  122. const char* tag, void* data, size_t len, bool /*diff*/)
  123. {
  124. this->self().beginTag(tag);
  125. string encoding;
  126. this->self().attribute("encoding", encoding);
  127. string_view tmp = this->self().loadStr();
  128. this->self().endTag(tag);
  129. if (encoding == "gz-base64") {
  130. auto [buf, bufSize] = Base64::decode(tmp);
  131. auto dstLen = uLongf(len); // TODO check for overflow?
  132. if ((uncompress(reinterpret_cast<Bytef*>(data), &dstLen,
  133. reinterpret_cast<const Bytef*>(buf.data()), uLong(bufSize))
  134. != Z_OK) ||
  135. (dstLen != len)) {
  136. throw MSXException("Error while decompressing blob.");
  137. }
  138. } else if (encoding == one_of("hex", "base64")) {
  139. bool ok = (encoding == "hex")
  140. ? HexDump::decode_inplace(tmp, static_cast<uint8_t*>(data), len)
  141. : Base64 ::decode_inplace(tmp, static_cast<uint8_t*>(data), len);
  142. if (!ok) {
  143. throw XMLException(
  144. "Length of decoded blob different from "
  145. "expected value (", len, ')');
  146. }
  147. } else {
  148. throw XMLException("Unsupported encoding \"", encoding, "\" for blob");
  149. }
  150. }
  151. template class InputArchiveBase<MemInputArchive>;
  152. template class InputArchiveBase<XmlInputArchive>;
  153. ////
  154. void MemOutputArchive::save(const std::string& s)
  155. {
  156. auto size = s.size();
  157. uint8_t* buf = buffer.allocate(sizeof(size) + size);
  158. memcpy(buf, &size, sizeof(size));
  159. memcpy(buf + sizeof(size), s.data(), size);
  160. }
  161. MemBuffer<uint8_t> MemOutputArchive::releaseBuffer(size_t& size)
  162. {
  163. return buffer.release(size);
  164. }
  165. ////
  166. void MemInputArchive::load(std::string& s)
  167. {
  168. size_t length;
  169. load(length);
  170. s.resize(length);
  171. if (length) {
  172. get(&s[0], length);
  173. }
  174. }
  175. string_view MemInputArchive::loadStr()
  176. {
  177. size_t length;
  178. load(length);
  179. const uint8_t* p = buffer.getCurrentPos();
  180. buffer.skip(length);
  181. return string_view(reinterpret_cast<const char*>(p), length);
  182. }
  183. ////
  184. // Too small inputs don't compress very well (often the compressed size is even
  185. // bigger than the input). It also takes a relatively long time (because often
  186. // compression has a relatively large setup time). I choose this value
  187. // semi-arbitrary. I only made it >= 52 so that the (incompressible) RP5C01
  188. // registers won't be compressed.
  189. constexpr size_t SMALL_SIZE = 64;
  190. void MemOutputArchive::serialize_blob(const char* /*tag*/, const void* data,
  191. size_t len, bool diff)
  192. {
  193. // Delta-compress in-memory blobs, see DeltaBlock.hh for more details.
  194. if (len > SMALL_SIZE) {
  195. auto deltaBlockIdx = unsigned(deltaBlocks.size());
  196. save(deltaBlockIdx); // see comment below in MemInputArchive
  197. deltaBlocks.push_back(diff
  198. ? lastDeltaBlocks.createNew(
  199. data, static_cast<const uint8_t*>(data), len)
  200. : lastDeltaBlocks.createNullDiff(
  201. data, static_cast<const uint8_t*>(data), len));
  202. } else {
  203. uint8_t* buf = buffer.allocate(len);
  204. memcpy(buf, data, len);
  205. }
  206. }
  207. void MemInputArchive::serialize_blob(const char* /*tag*/, void* data,
  208. size_t len, bool /*diff*/)
  209. {
  210. if (len > SMALL_SIZE) {
  211. // Usually blobs are saved in the same order as they are loaded
  212. // (via the serialize_blob() methods in respectively
  213. // MemOutputArchive and MemInputArchive). In that case keeping
  214. // track of the deltaBlockIdx in the savestate itself is
  215. // redundant (it will simply be an increasing value). However
  216. // in rare cases, via the {begin,end,skip)Section() methods, it
  217. // is possible that certain blobs are stored in the savestate,
  218. // but skipped while loading. That's why we do need the index.
  219. unsigned deltaBlockIdx; load(deltaBlockIdx);
  220. deltaBlocks[deltaBlockIdx]->apply(static_cast<uint8_t*>(data), len);
  221. } else {
  222. memcpy(data, buffer.getCurrentPos(), len);
  223. buffer.skip(len);
  224. }
  225. }
  226. ////
  227. XmlOutputArchive::XmlOutputArchive(const string& filename)
  228. : root("serial")
  229. {
  230. root.addAttribute("openmsx_version", Version::full());
  231. root.addAttribute("date_time", Date::toString(time(nullptr)));
  232. root.addAttribute("platform", TARGET_PLATFORM);
  233. {
  234. auto f = FileOperations::openFile(filename, "wb");
  235. if (!f) goto error;
  236. int duped_fd = dup(fileno(f.get()));
  237. if (duped_fd == -1) goto error;
  238. file = gzdopen(duped_fd, "wb9");
  239. if (!file) {
  240. ::close(duped_fd);
  241. goto error;
  242. }
  243. current.push_back(&root);
  244. return; // success
  245. // on scope-exit 'File* f' is closed, and 'gzFile file'
  246. // uses the dup()'ed file descriptor.
  247. }
  248. error:
  249. throw XMLException("Could not open compressed file \"", filename, "\"");
  250. }
  251. void XmlOutputArchive::close()
  252. {
  253. if (!file) return; // already closed
  254. assert(current.back() == &root);
  255. const char* header =
  256. "<?xml version=\"1.0\" ?>\n"
  257. "<!DOCTYPE openmsx-serialize SYSTEM 'openmsx-serialize.dtd'>\n";
  258. string dump = root.dump();
  259. if ((gzwrite(file, const_cast<char*>(header), unsigned(strlen(header))) == 0) ||
  260. (gzwrite(file, const_cast<char*>(dump.data()), unsigned(dump.size())) == 0) ||
  261. (gzclose(file) != Z_OK)) {
  262. throw XMLException("Could not write savestate file.");
  263. }
  264. file = nullptr;
  265. }
  266. XmlOutputArchive::~XmlOutputArchive()
  267. {
  268. try {
  269. close();
  270. } catch (...) {
  271. // Eat exception. Explicitly call close() if you want to handle errors.
  272. }
  273. }
  274. void XmlOutputArchive::saveChar(char c)
  275. {
  276. save(string(1, c));
  277. }
  278. void XmlOutputArchive::save(const string& str)
  279. {
  280. assert(!current.empty());
  281. assert(current.back()->getData().empty());
  282. current.back()->setData(str);
  283. }
  284. void XmlOutputArchive::save(bool b)
  285. {
  286. assert(!current.empty());
  287. assert(current.back()->getData().empty());
  288. current.back()->setData(b ? "true" : "false");
  289. }
  290. void XmlOutputArchive::save(unsigned char b)
  291. {
  292. save(unsigned(b));
  293. }
  294. void XmlOutputArchive::save(signed char c)
  295. {
  296. save(int(c));
  297. }
  298. void XmlOutputArchive::save(char c)
  299. {
  300. save(int(c));
  301. }
  302. void XmlOutputArchive::save(int i)
  303. {
  304. saveImpl(i);
  305. }
  306. void XmlOutputArchive::save(unsigned u)
  307. {
  308. saveImpl(u);
  309. }
  310. void XmlOutputArchive::save(unsigned long long ull)
  311. {
  312. saveImpl(ull);
  313. }
  314. void XmlOutputArchive::attribute(const char* name, const string& str)
  315. {
  316. assert(!current.empty());
  317. assert(!current.back()->hasAttribute(name));
  318. current.back()->addAttribute(name, str);
  319. }
  320. void XmlOutputArchive::attribute(const char* name, int i)
  321. {
  322. attributeImpl(name, i);
  323. }
  324. void XmlOutputArchive::attribute(const char* name, unsigned u)
  325. {
  326. attributeImpl(name, u);
  327. }
  328. void XmlOutputArchive::beginTag(const char* tag)
  329. {
  330. assert(!current.empty());
  331. auto& elem = current.back()->addChild(tag);
  332. current.push_back(&elem);
  333. }
  334. void XmlOutputArchive::endTag(const char* tag)
  335. {
  336. assert(!current.empty());
  337. assert(current.back()->getName() == tag); (void)tag;
  338. current.pop_back();
  339. }
  340. ////
  341. XmlInputArchive::XmlInputArchive(const string& filename)
  342. : rootElem(XMLLoader::load(filename, "openmsx-serialize.dtd"))
  343. {
  344. elems.emplace_back(&rootElem, 0);
  345. }
  346. string_view XmlInputArchive::loadStr()
  347. {
  348. if (!elems.back().first->getChildren().empty()) {
  349. throw XMLException("No child tags expected for primitive type");
  350. }
  351. return elems.back().first->getData();
  352. }
  353. void XmlInputArchive::load(string& t)
  354. {
  355. t = loadStr();
  356. }
  357. void XmlInputArchive::loadChar(char& c)
  358. {
  359. std::string str;
  360. load(str);
  361. std::istringstream is(str);
  362. is >> c;
  363. }
  364. void XmlInputArchive::load(bool& b)
  365. {
  366. string_view s = loadStr();
  367. if (s == one_of("true", "1")) {
  368. b = true;
  369. } else if (s == one_of("false", "0")) {
  370. b = false;
  371. } else {
  372. throw XMLException("Bad value found for boolean: ", s);
  373. }
  374. }
  375. // This function parses a number from a string. It's similar to the generic
  376. // templatized XmlInputArchive::load() method, but _much_ faster. It does
  377. // have some limitations though:
  378. // - it can't handle leading whitespace
  379. // - it can't handle extra characters at the end of the string
  380. // - it can only handle one base (only decimal, not octal or hexadecimal)
  381. // - it doesn't understand a leading '+' sign
  382. // - it doesn't detect overflow or underflow (The generic implementation sets
  383. // a 'bad' flag on the stream and clips the result to the min/max allowed
  384. // value. Though this 'bad' flag was ignored by the openMSX code).
  385. // This routine is only used to parse strings we've written ourselves (and the
  386. // savestate/replay XML files are not meant to be manually edited). So the
  387. // above limitations don't really matter. And we can use the speed gain.
  388. template<bool IS_SIGNED> struct ConditionalNegate;
  389. template<> struct ConditionalNegate<true> {
  390. template<typename T> void operator()(bool negate, T& t) {
  391. if (negate) t = -t; // ok to negate a signed type
  392. }
  393. };
  394. template<> struct ConditionalNegate<false> {
  395. template<typename T> void operator()(bool negate, T& /*t*/) {
  396. assert(!negate); (void)negate; // can't negate unsigned type
  397. }
  398. };
  399. template<typename T> static inline void fastAtoi(string_view str, T& t)
  400. {
  401. t = 0;
  402. bool neg = false;
  403. size_t i = 0;
  404. size_t l = str.size();
  405. constexpr bool IS_SIGNED = std::numeric_limits<T>::is_signed;
  406. if (IS_SIGNED) {
  407. if (l == 0) return;
  408. if (str[0] == '-') {
  409. neg = true;
  410. i = 1;
  411. }
  412. }
  413. for (/**/; i < l; ++i) {
  414. unsigned d = str[i] - '0';
  415. if (unlikely(d > 9)) {
  416. throw XMLException("Invalid integer: ", str);
  417. }
  418. t = 10 * t + d;
  419. }
  420. // The following stuff does the equivalent of:
  421. // if (neg) t = -t;
  422. // Though this expression triggers a warning on VC++ when T is an
  423. // unsigned type. This complex template stuff avoids the warning.
  424. ConditionalNegate<IS_SIGNED> negateFunctor;
  425. negateFunctor(neg, t);
  426. }
  427. void XmlInputArchive::load(int& i)
  428. {
  429. string_view str = loadStr();
  430. fastAtoi(str, i);
  431. }
  432. void XmlInputArchive::load(unsigned& u)
  433. {
  434. string_view str = loadStr();
  435. fastAtoi(str, u);
  436. }
  437. void XmlInputArchive::load(unsigned long long& ull)
  438. {
  439. string_view str = loadStr();
  440. fastAtoi(str, ull);
  441. }
  442. void XmlInputArchive::load(unsigned char& b)
  443. {
  444. unsigned i;
  445. load(i);
  446. b = i;
  447. }
  448. void XmlInputArchive::load(signed char& c)
  449. {
  450. int i;
  451. load(i);
  452. c = i;
  453. }
  454. void XmlInputArchive::load(char& c)
  455. {
  456. int i;
  457. load(i);
  458. c = i;
  459. }
  460. void XmlInputArchive::beginTag(const char* tag)
  461. {
  462. auto* child = elems.back().first->findNextChild(
  463. tag, elems.back().second);
  464. if (!child) {
  465. string path;
  466. for (auto& e : elems) {
  467. strAppend(path, e.first->getName(), '/');
  468. }
  469. throw XMLException("No child tag \"", tag,
  470. "\" found at location \"", path, '\"');
  471. }
  472. elems.emplace_back(child, 0);
  473. }
  474. void XmlInputArchive::endTag(const char* tag)
  475. {
  476. const auto& elem = *elems.back().first;
  477. if (elem.getName() != tag) {
  478. throw XMLException("End tag \"", elem.getName(),
  479. "\" not equal to begin tag \"", tag, "\"");
  480. }
  481. auto& elem2 = const_cast<XMLElement&>(elem);
  482. elem2.clearName(); // mark this elem for later beginTag() calls
  483. elems.pop_back();
  484. }
  485. void XmlInputArchive::attribute(const char* name, string& t)
  486. {
  487. try {
  488. t = elems.back().first->getAttribute(name);
  489. } catch (ConfigException& e) {
  490. throw XMLException(std::move(e).getMessage());
  491. }
  492. }
  493. void XmlInputArchive::attribute(const char* name, int& i)
  494. {
  495. attributeImpl(name, i);
  496. }
  497. void XmlInputArchive::attribute(const char* name, unsigned& u)
  498. {
  499. attributeImpl(name, u);
  500. }
  501. bool XmlInputArchive::hasAttribute(const char* name)
  502. {
  503. return elems.back().first->hasAttribute(name);
  504. }
  505. bool XmlInputArchive::findAttribute(const char* name, unsigned& value)
  506. {
  507. return elems.back().first->findAttributeInt(name, value);
  508. }
  509. int XmlInputArchive::countChildren() const
  510. {
  511. return int(elems.back().first->getChildren().size());
  512. }
  513. } // namespace openmsx