HtmlEntity.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688
  1. /*
  2. * Copyright 2005 - 2016 Zarafa and its licensors
  3. *
  4. * This program is free software: you can redistribute it and/or modify
  5. * it under the terms of the GNU Affero General Public License, version 3,
  6. * as published by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. * GNU Affero General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU Affero General Public License
  14. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  15. *
  16. */
  17. #include <kopano/platform.h>
  18. #include <string>
  19. #include "HtmlEntity.h"
  20. #include <kopano/charset/convert.h>
  21. namespace KC {
  22. static const struct HTMLEntity_t {
  23. const WCHAR *s;
  24. WCHAR c;
  25. } _HTMLEntity[] = {
  26. {L"AElig", 198},
  27. {L"Aacute", 193},
  28. {L"Acirc", 194},
  29. {L"Agrave", 192},
  30. {L"Alpha", 913},
  31. {L"Aring", 197},
  32. {L"Atilde", 195},
  33. {L"Auml", 196},
  34. {L"Beta", 914},
  35. {L"Ccedil", 199},
  36. {L"Chi", 935},
  37. {L"Dagger", 8225},
  38. {L"Delta", 916},
  39. {L"ETH", 208},
  40. {L"Eacute", 201},
  41. {L"Ecirc", 202},
  42. {L"Egrave", 200},
  43. {L"Epsilon", 917},
  44. {L"Eta", 919},
  45. {L"Euml", 203},
  46. {L"Gamma", 915},
  47. {L"Iacute", 205},
  48. {L"Icirc", 206},
  49. {L"Igrave", 204},
  50. {L"Iota", 921},
  51. {L"Iuml", 207},
  52. {L"Kappa", 922},
  53. {L"Lambda", 923},
  54. {L"Mu", 924},
  55. {L"Ntilde", 209},
  56. {L"Nu", 925},
  57. {L"OElig", 338},
  58. {L"Oacute", 211},
  59. {L"Ocirc", 212},
  60. {L"Ograve", 210},
  61. {L"Omega", 937},
  62. {L"Omicron", 927},
  63. {L"Oslash", 216},
  64. {L"Otilde", 213},
  65. {L"Ouml", 214},
  66. {L"Phi", 934},
  67. {L"Pi", 928},
  68. {L"Prime", 8243},
  69. {L"Psi", 936},
  70. {L"Rho", 929},
  71. {L"Scaron", 352},
  72. {L"Sigma", 931},
  73. {L"THORN", 222},
  74. {L"Tau", 932},
  75. {L"Theta", 920},
  76. {L"Uacute", 218},
  77. {L"Ucirc", 219},
  78. {L"Ugrave", 217},
  79. {L"Upsilon", 933},
  80. {L"Uuml", 220},
  81. {L"Xi", 926},
  82. {L"Yacute", 221},
  83. {L"Yuml", 376},
  84. {L"Zeta", 918},
  85. {L"aacute", 225},
  86. {L"acirc", 226},
  87. {L"acute", 180},
  88. {L"aelig", 230},
  89. {L"agrave", 224},
  90. {L"alpha", 945},
  91. {L"amp", 38},
  92. {L"and", 8743},
  93. {L"ang", 8736},
  94. {L"aring", 229},
  95. {L"asymp", 8776},
  96. {L"atilde", 227},
  97. {L"auml", 228},
  98. {L"bdquo", 8222},
  99. {L"beta", 946},
  100. {L"brvbar", 166},
  101. {L"bull", 8226},
  102. {L"cap", 8745},
  103. {L"ccedil", 231},
  104. {L"cedil", 184},
  105. {L"cent", 162},
  106. {L"chi", 967},
  107. {L"chi", 967},
  108. {L"circ", 710},
  109. {L"clubs", 9827},
  110. {L"cong", 8773},
  111. {L"copy", 169},
  112. {L"crarr", 8629},
  113. {L"cup", 8746},
  114. {L"curren", 164},
  115. {L"dagger", 8224},
  116. {L"darr", 8595},
  117. {L"deg", 176},
  118. {L"delta", 948},
  119. {L"diams", 9830},
  120. {L"divide", 247},
  121. {L"eacute", 233},
  122. {L"ecirc", 234},
  123. {L"egrave", 232},
  124. {L"empty", 8709},
  125. {L"emsp", 8195},
  126. {L"ensp", 8194},
  127. {L"epsilon", 949},
  128. {L"equiv", 8801},
  129. {L"eta", 951},
  130. {L"eth", 240},
  131. {L"euml", 235},
  132. {L"euro", 8364},
  133. {L"exist", 8707},
  134. {L"fnof", 402},
  135. {L"forall", 8704},
  136. {L"frac12", 189},
  137. {L"frac14", 188},
  138. {L"frac34", 190},
  139. {L"gamma", 947},
  140. {L"ge", 8805},
  141. {L"gt", 62},
  142. {L"harr", 8596},
  143. {L"hearts", 9829},
  144. {L"hellip", 8230},
  145. {L"iacute", 237},
  146. {L"icirc", 238},
  147. {L"iexcl", 161},
  148. {L"igrave", 236},
  149. {L"infin", 8734},
  150. {L"int", 8747},
  151. {L"iota", 953},
  152. {L"iquest", 191},
  153. {L"isin", 8712},
  154. {L"iuml", 239},
  155. {L"kappa", 954},
  156. {L"lambda", 955},
  157. {L"laquo", 171},
  158. {L"larr", 8592},
  159. {L"lceil", 8968},
  160. {L"ldquo", 8220},
  161. {L"le", 8804},
  162. {L"lfloor", 8970},
  163. {L"lowast", 8727},
  164. {L"loz", 9674},
  165. {L"lrm", 8206},
  166. {L"lsaquo", 8249},
  167. {L"lsquo", 8216},
  168. {L"lt", 60},
  169. {L"macr", 175},
  170. {L"mdash", 8212},
  171. {L"micro", 181},
  172. {L"middot", 183},
  173. {L"minus", 8722},
  174. {L"mu", 956},
  175. {L"nabla", 8711},
  176. {L"nbsp", 160},
  177. {L"ndash", 8211},
  178. {L"ne", 8800},
  179. {L"ni", 8715},
  180. {L"not", 172},
  181. {L"notin", 8713},
  182. {L"nsub", 8836},
  183. {L"ntilde", 241},
  184. {L"nu", 957},
  185. {L"oacute", 243},
  186. {L"ocirc", 244},
  187. {L"oelig", 339},
  188. {L"ograve", 242},
  189. {L"oline", 8254},
  190. {L"omega", 969},
  191. {L"omicron", 959},
  192. {L"oplus", 8853},
  193. {L"or", 8744},
  194. {L"ordf", 170},
  195. {L"ordm", 186},
  196. {L"oslash", 248},
  197. {L"otilde", 245},
  198. {L"otimes", 8855},
  199. {L"ouml", 246},
  200. {L"para", 182},
  201. {L"part", 8706},
  202. {L"permil", 8240},
  203. {L"perp", 8869},
  204. {L"phi", 966},
  205. {L"pi", 960},
  206. {L"piv", 982},
  207. {L"plusmn", 177},
  208. {L"pound", 163},
  209. {L"prime", 8242},
  210. {L"prod", 8719},
  211. {L"prop", 8733},
  212. {L"psi", 968},
  213. {L"quot", 34},
  214. {L"radic", 8730},
  215. {L"raquo", 187},
  216. {L"rarr", 8594},
  217. {L"rceil", 8969},
  218. {L"rdquo", 8221},
  219. {L"reg", 174},
  220. {L"rfloor", 8971},
  221. {L"rho", 961},
  222. {L"rlm", 8207},
  223. {L"rsaquo", 8250},
  224. {L"rsquo", 8217},
  225. {L"sbquo", 8218},
  226. {L"scaron", 353},
  227. {L"sdot", 8901},
  228. {L"sect", 167},
  229. {L"shy", 173},
  230. {L"sigma", 963},
  231. {L"sigmaf", 962},
  232. {L"sim", 8764},
  233. {L"spades", 9824},
  234. {L"sub", 8834},
  235. {L"sube", 8838},
  236. {L"sum", 8721},
  237. {L"sup", 8835},
  238. {L"sup1", 185},
  239. {L"sup2", 178},
  240. {L"sup3", 179},
  241. {L"supe", 8839},
  242. {L"szlig", 223},
  243. {L"tau", 964},
  244. {L"there4", 8756},
  245. {L"theta", 952},
  246. {L"thetasym", 977},
  247. {L"thinsp", 8201},
  248. {L"thorn", 254},
  249. {L"tilde", 732},
  250. {L"times", 215},
  251. {L"trade", 8482},
  252. {L"uacute", 250},
  253. {L"uarr", 8593},
  254. {L"ucirc", 251},
  255. {L"ugrave", 249},
  256. {L"uml", 168},
  257. {L"upsih", 978},
  258. {L"upsilon", 965},
  259. {L"uuml", 252},
  260. {L"xi", 958},
  261. {L"yacute", 253},
  262. {L"yen", 165},
  263. {L"yuml", 255},
  264. {L"zeta", 950},
  265. {L"zwj", 8205},
  266. {L"zwnj", 8204}
  267. };
  268. static const size_t cHTMLEntity = ARRAY_SIZE(_HTMLEntity);
  269. static const struct HTMLEntityToName_t {
  270. WCHAR c;
  271. const WCHAR *s;
  272. } _HTMLEntityToName[] = {
  273. {34, L"quot"},
  274. {38, L"amp"},
  275. {60, L"lt"},
  276. {62, L"gt"},
  277. {160, L"nbsp"},
  278. {161, L"iexcl"},
  279. {162, L"cent"},
  280. {163, L"pound"},
  281. {164, L"curren"},
  282. {165, L"yen"},
  283. {166, L"brvbar"},
  284. {167, L"sect"},
  285. {168, L"uml"},
  286. {169, L"copy"},
  287. {170, L"ordf"},
  288. {171, L"laquo"},
  289. {172, L"not"},
  290. {173, L"shy"},
  291. {174, L"reg"},
  292. {175, L"macr"},
  293. {176, L"deg"},
  294. {177, L"plusmn"},
  295. {178, L"sup2"},
  296. {179, L"sup3"},
  297. {180, L"acute"},
  298. {181, L"micro"},
  299. {182, L"para"},
  300. {183, L"middot"},
  301. {184, L"cedil"},
  302. {185, L"sup1"},
  303. {186, L"ordm"},
  304. {187, L"raquo"},
  305. {188, L"frac14"},
  306. {189, L"frac12"},
  307. {190, L"frac34"},
  308. {191, L"iquest"},
  309. {192, L"Agrave"},
  310. {193, L"Aacute"},
  311. {194, L"Acirc"},
  312. {195, L"Atilde"},
  313. {196, L"Auml"},
  314. {197, L"Aring"},
  315. {198, L"AElig"},
  316. {199, L"Ccedil"},
  317. {200, L"Egrave"},
  318. {201, L"Eacute"},
  319. {202, L"Ecirc"},
  320. {203, L"Euml"},
  321. {204, L"Igrave"},
  322. {205, L"Iacute"},
  323. {206, L"Icirc"},
  324. {207, L"Iuml"},
  325. {208, L"ETH"},
  326. {209, L"Ntilde"},
  327. {210, L"Ograve"},
  328. {211, L"Oacute"},
  329. {212, L"Ocirc"},
  330. {213, L"Otilde"},
  331. {214, L"Ouml"},
  332. {215, L"times"},
  333. {216, L"Oslash"},
  334. {217, L"Ugrave"},
  335. {218, L"Uacute"},
  336. {219, L"Ucirc"},
  337. {220, L"Uuml"},
  338. {221, L"Yacute"},
  339. {222, L"THORN"},
  340. {223, L"szlig"},
  341. {224, L"agrave"},
  342. {225, L"aacute"},
  343. {226, L"acirc"},
  344. {227, L"atilde"},
  345. {228, L"auml"},
  346. {229, L"aring"},
  347. {230, L"aelig"},
  348. {231, L"ccedil"},
  349. {232, L"egrave"},
  350. {233, L"eacute"},
  351. {234, L"ecirc"},
  352. {235, L"euml"},
  353. {236, L"igrave"},
  354. {237, L"iacute"},
  355. {238, L"icirc"},
  356. {239, L"iuml"},
  357. {240, L"eth"},
  358. {241, L"ntilde"},
  359. {242, L"ograve"},
  360. {243, L"oacute"},
  361. {244, L"ocirc"},
  362. {245, L"otilde"},
  363. {246, L"ouml"},
  364. {247, L"divide"},
  365. {248, L"oslash"},
  366. {249, L"ugrave"},
  367. {250, L"uacute"},
  368. {251, L"ucirc"},
  369. {252, L"uuml"},
  370. {253, L"yacute"},
  371. {254, L"thorn"},
  372. {255, L"yuml"},
  373. {338, L"OElig"},
  374. {339, L"oelig"},
  375. {352, L"Scaron"},
  376. {353, L"scaron"},
  377. {376, L"Yuml"},
  378. {402, L"fnof"},
  379. {710, L"circ"},
  380. {732, L"tilde"},
  381. {913, L"Alpha"},
  382. {914, L"Beta"},
  383. {915, L"Gamma"},
  384. {916, L"Delta"},
  385. {917, L"Epsilon"},
  386. {918, L"Zeta"},
  387. {919, L"Eta"},
  388. {920, L"Theta"},
  389. {921, L"Iota"},
  390. {922, L"Kappa"},
  391. {923, L"Lambda"},
  392. {924, L"Mu"},
  393. {925, L"Nu"},
  394. {926, L"Xi"},
  395. {927, L"Omicron"},
  396. {928, L"Pi"},
  397. {929, L"Rho"},
  398. {931, L"Sigma"},
  399. {932, L"Tau"},
  400. {933, L"Upsilon"},
  401. {934, L"Phi"},
  402. {935, L"Chi"},
  403. {936, L"Psi"},
  404. {937, L"Omega"},
  405. {945, L"alpha"},
  406. {946, L"beta"},
  407. {947, L"gamma"},
  408. {948, L"delta"},
  409. {949, L"epsilon"},
  410. {950, L"zeta"},
  411. {951, L"eta"},
  412. {952, L"theta"},
  413. {953, L"iota"},
  414. {954, L"kappa"},
  415. {955, L"lambda"},
  416. {956, L"mu"},
  417. {957, L"nu"},
  418. {958, L"xi"},
  419. {959, L"omicron"},
  420. {960, L"pi"},
  421. {961, L"rho"},
  422. {962, L"sigmaf"},
  423. {963, L"sigma"},
  424. {964, L"tau"},
  425. {965, L"upsilon"},
  426. {966, L"phi"},
  427. {967, L"chi"},
  428. {967, L"chi"},
  429. {968, L"psi"},
  430. {969, L"omega"},
  431. {977, L"thetasym"},
  432. {978, L"upsih"},
  433. {982, L"piv"},
  434. {8194, L"ensp"},
  435. {8195, L"emsp"},
  436. {8201, L"thinsp"},
  437. {8204, L"zwnj"},
  438. {8205, L"zwj"},
  439. {8206, L"lrm"},
  440. {8207, L"rlm"},
  441. {8211, L"ndash"},
  442. {8212, L"mdash"},
  443. {8216, L"lsquo"},
  444. {8217, L"rsquo"},
  445. {8218, L"sbquo"},
  446. {8220, L"ldquo"},
  447. {8221, L"rdquo"},
  448. {8222, L"bdquo"},
  449. {8224, L"dagger"},
  450. {8225, L"Dagger"},
  451. {8226, L"bull"},
  452. {8230, L"hellip"},
  453. {8240, L"permil"},
  454. {8242, L"prime"},
  455. {8243, L"Prime"},
  456. {8249, L"lsaquo"},
  457. {8250, L"rsaquo"},
  458. {8254, L"oline"},
  459. {8364, L"euro"},
  460. {8482, L"trade"},
  461. {8592, L"larr"},
  462. {8593, L"uarr"},
  463. {8594, L"rarr"},
  464. {8595, L"darr"},
  465. {8596, L"harr"},
  466. {8629, L"crarr"},
  467. {8704, L"forall"},
  468. {8706, L"part"},
  469. {8707, L"exist"},
  470. {8709, L"empty"},
  471. {8711, L"nabla"},
  472. {8712, L"isin"},
  473. {8713, L"notin"},
  474. {8715, L"ni"},
  475. {8719, L"prod"},
  476. {8721, L"sum"},
  477. {8722, L"minus"},
  478. {8727, L"lowast"},
  479. {8730, L"radic"},
  480. {8733, L"prop"},
  481. {8734, L"infin"},
  482. {8736, L"ang"},
  483. {8743, L"and"},
  484. {8744, L"or"},
  485. {8745, L"cap"},
  486. {8746, L"cup"},
  487. {8747, L"int"},
  488. {8756, L"there4"},
  489. {8764, L"sim"},
  490. {8773, L"cong"},
  491. {8776, L"asymp"},
  492. {8800, L"ne"},
  493. {8801, L"equiv"},
  494. {8804, L"le"},
  495. {8805, L"ge"},
  496. {8834, L"sub"},
  497. {8835, L"sup"},
  498. {8836, L"nsub"},
  499. {8838, L"sube"},
  500. {8839, L"supe"},
  501. {8853, L"oplus"},
  502. {8855, L"otimes"},
  503. {8869, L"perp"},
  504. {8901, L"sdot"},
  505. {8968, L"lceil"},
  506. {8969, L"rceil"},
  507. {8970, L"lfloor"},
  508. {8971, L"rfloor"},
  509. {9674, L"loz"},
  510. {9824, L"spades"},
  511. {9827, L"clubs"},
  512. {9829, L"hearts"},
  513. {9830, L"diams"}
  514. };
  515. static const size_t cHTMLEntityToName = ARRAY_SIZE(_HTMLEntityToName);
  516. static int compareHTMLEntityToChar(const void *m1, const void *m2)
  517. {
  518. auto e1 = static_cast<const HTMLEntity_t *>(m1);
  519. auto e2 = static_cast<const HTMLEntity_t *>(m2);
  520. return wcscmp( e1->s, e2->s );
  521. }
  522. static int compareHTMLEntityToName(const void *m1, const void *m2)
  523. {
  524. auto e1 = static_cast<const HTMLEntityToName_t *>(m1);
  525. auto e2 = static_cast<const HTMLEntityToName_t *>(m2);
  526. return (e1->c < e2->c) ? -1 : (e1->c == e2->c) ? 0 : 1;
  527. }
  528. WCHAR CHtmlEntity::toChar( const WCHAR *name )
  529. {
  530. HTMLEntity_t key = {0};
  531. key.s = name;
  532. HTMLEntity_t *result;
  533. result = (HTMLEntity_t *)bsearch(&key, &_HTMLEntity, cHTMLEntity, sizeof( HTMLEntity_t), (int (*)(const void*, const void*))compareHTMLEntityToChar );
  534. if (result)
  535. return result->c;
  536. else
  537. return 0;
  538. }
  539. const WCHAR *CHtmlEntity::toName( WCHAR c )
  540. {
  541. HTMLEntityToName_t key = {0};
  542. key.c = c;
  543. HTMLEntityToName_t *result;
  544. result = (HTMLEntityToName_t *)bsearch(&key, &_HTMLEntityToName, cHTMLEntityToName, sizeof( HTMLEntityToName_t), (int (*)(const void*, const void*))compareHTMLEntityToName );
  545. if (result)
  546. return result->s;
  547. else
  548. return NULL;
  549. }
  550. /**
  551. * Convert a character to HTML entity. when no entity is needed, false
  552. * is returned. Output parameter will always contain correct
  553. * representation of input.
  554. *
  555. * @param[in] c wide character to convert into HTML entity
  556. * @param[out] strHTML HTML version of input
  557. *
  558. * @return false if no conversion took place, true if it did.
  559. */
  560. bool CHtmlEntity::CharToHtmlEntity(WCHAR c, std::wstring &strHTML)
  561. {
  562. bool bHTML = false;
  563. switch(c) {
  564. case '\r':
  565. bHTML = true; // but no output
  566. break;
  567. case '\n':
  568. strHTML = L"<br>\n";
  569. bHTML = true;
  570. break;
  571. case '\t':
  572. strHTML = L"&nbsp;&nbsp;&nbsp; ";
  573. bHTML = true;
  574. break;
  575. case ' ':
  576. strHTML = L"&nbsp;";
  577. bHTML = true;
  578. break;
  579. default:
  580. const WCHAR *lpChar = CHtmlEntity::toName(c);
  581. if (lpChar == nullptr)
  582. break;
  583. strHTML = std::wstring(L"&") + lpChar + L";";
  584. bHTML = true;
  585. break;
  586. }
  587. if (!bHTML)
  588. strHTML = c;
  589. return bHTML;
  590. }
  591. /**
  592. * Validate HTML entity
  593. *
  594. * Valid:
  595. * &{#100 | #x64 | amp};test
  596. *
  597. * @param[in] strEntity a string part to test if this is a HTML entity, which could be a single wide character
  598. *
  599. * @return true if input is HTML, false if it is a normal string
  600. */
  601. bool CHtmlEntity::validateHtmlEntity(const std::wstring &strEntity)
  602. {
  603. if(strEntity.size() < 3 || strEntity[0] != '&')
  604. return false;
  605. size_t pos = strEntity.find(';');
  606. if (pos == std::wstring::npos || pos < 3)
  607. return false;
  608. std::wstring str;
  609. if (strEntity[1] == '#') {
  610. int base = 10;
  611. str = strEntity.substr(2, pos-2);
  612. if(str[0] == 'x')
  613. base = 16;
  614. return wcstoul(str.c_str() + 1, NULL, base) != 0;
  615. }
  616. str = strEntity.substr(1, pos - 2);
  617. return CHtmlEntity::toChar(str.c_str()) > 0;
  618. }
  619. /**
  620. * Convert HTML entity to a single wide character.
  621. *
  622. * @param[in] strEntity valid HTML entity to convert
  623. *
  624. * @return wide character for entity, or ? if conversion failed.
  625. */
  626. WCHAR CHtmlEntity::HtmlEntityToChar(const std::wstring &strEntity)
  627. {
  628. unsigned int ulCode;
  629. if (strEntity[0] != '#') {
  630. ulCode = toChar(strEntity.c_str());
  631. if (ulCode > 0)
  632. return (WCHAR)ulCode;
  633. return '?';
  634. }
  635. // We have a unicode number, use iconv to get the WCHAR
  636. std::string strUnicode;
  637. int base = 10;
  638. auto pNum = strEntity.c_str() + 1;
  639. if (strEntity.size() > 2 && strEntity[1] == 'x') {
  640. base = 16;
  641. ++pNum;
  642. }
  643. ulCode = wcstoul(pNum, NULL, base);
  644. if (ulCode <= 0xFFFF /*USHRT_MAX*/)
  645. return (WCHAR)ulCode;
  646. strUnicode.append(1, (ulCode & 0xff));
  647. strUnicode.append(1, (ulCode >> 8) & 0xff);
  648. strUnicode.append(1, (ulCode >> 16) & 0xff);
  649. strUnicode.append(1, (ulCode >> 24) & 0xff);
  650. try {
  651. return convert_to<std::wstring>(CHARSET_WCHAR, strUnicode, 4, "UCS-4LE")[0];
  652. } catch (const illegal_sequence_exception &) {
  653. // iconv doesn't seem to like certain sequences. one of them is 0x92000000 (LE).
  654. return L'?';
  655. }
  656. return '?';
  657. }
  658. } /* namespace */