codepage.cpp 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. /*
  2. * Copyright 2005 - 2016 Zarafa and its licensors
  3. *
  4. * This program is free software: you can redistribute it and/or modify
  5. * it under the terms of the GNU Affero General Public License, version 3,
  6. * as published by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. * GNU Affero General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU Affero General Public License
  14. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  15. *
  16. */
  17. #include <kopano/platform.h>
  18. #include <mapidefs.h>
  19. #include <mapicode.h>
  20. #include <kopano/codepage.h>
  21. namespace KC {
  22. // These charset should all be supported by iconv
  23. // @see http://msdn.microsoft.com/en-us/library/dd317756(VS.85).aspx
  24. // this list is incomplete
  25. static const struct CPMAP {
  26. const char *charset;
  27. ULONG codepage;
  28. } CPMAP[] = {
  29. { "DIN_66003", 20106 },
  30. { "NS_4551-1", 20108 },
  31. { "SEN_850200_B", 20107 },
  32. { "big5", 950 },
  33. { "csISO2022JP", 50221 },
  34. { "euc-jp", 51932 },
  35. { "euc-cn", 51936 },
  36. { "euc-kr", 51949 },
  37. { "euc-kr", 949 }, // euc-kr is compatible with cp949 according to some sources (some horde tickets say this)
  38. { "cp949", 949 },
  39. { "ks_c_5601-1987", 949 }, // ks_c_5601-1987 == cp949, but this charset is not recognized by iconv
  40. { "gb18030", 936 }, // was gb2312, but cp936 is gb3212 + more, which is superseded by gb18030 (is codepage 54936?)
  41. { "gb2312", 936 }, // entry for reverse lookup
  42. { "GBK", 936 }, // entry for reverse lookup
  43. { "csgb2312", 52936 }, // not sure, hz-cn-2312 according to MS, iconv has this one
  44. { "ibm852", 852 },
  45. { "ibm866", 866 },
  46. { "iso-2022-jp", 50220 },
  47. { "iso-2022-jp", 50222 },
  48. { "iso-2022-kr", 50225 },
  49. { "windows-1252", 1252 },
  50. { "iso-8859-1", 28591 },
  51. { "iso-8859-2", 28592 },
  52. { "iso-8859-3", 28593 },
  53. { "iso-8859-4", 28594 },
  54. { "iso-8859-5", 28595 },
  55. { "iso-8859-6", 28596 },
  56. { "iso-8859-7", 28597 },
  57. { "iso-8859-8", 28598 },
  58. { "iso-8859-8-i", 28598 },
  59. { "iso-8859-9", 28599 },
  60. { "iso-8859-13", 28603 },
  61. { "iso-8859-15", 28605 },
  62. { "koi8-r", 20866 },
  63. { "koi8-u", 21866 },
  64. { "shift-jis", 932 },
  65. { "shift_jis", 932 },
  66. { "unicode", 1200 }, /* UTF-16LE and BMP-only */
  67. { "unicodebig", 1201 }, /* UTF-16BE and BMP-only */
  68. { "utf-7", 65000 },
  69. { "utf-8", 65001 },
  70. { "windows-1250", 1250 },
  71. { "windows-1251", 1251 },
  72. { "windows-1253", 1253 },
  73. { "windows-1254", 1254 },
  74. { "windows-1255", 1255 },
  75. { "windows-1256", 1256 },
  76. { "windows-1257", 1257 },
  77. { "windows-1258", 1258 },
  78. { "windows-874", 874 },
  79. { "us-ascii", 20127 }
  80. };
  81. /**
  82. * Converts a Windows codepage to a valid iconv charset string.
  83. *
  84. * @param[in] codepage Windows codepage number (eg. from PR_INTERNET_CPID)
  85. * @param[out] lppszCharset Pointer to internal structure containing iconv charset string
  86. * @retval MAPI_E_NOT_FOUND on unknown codepage, lppszCharset will be unchanged.
  87. */
  88. HRESULT HrGetCharsetByCP(ULONG codepage, const char **lppszCharset)
  89. {
  90. for (size_t i = 0; i < ARRAY_SIZE(CPMAP); ++i) {
  91. if(CPMAP[i].codepage == codepage) {
  92. *lppszCharset = CPMAP[i].charset;
  93. return hrSuccess;
  94. }
  95. }
  96. return MAPI_E_NOT_FOUND;
  97. }
  98. /**
  99. * Converts a Windows codepage to a valid iconv charset string.
  100. *
  101. * @param[in] codepage Windows codepage number (eg. from PR_INTERNET_CPID)
  102. * @param[out] lppszCharset Pointer to internal structure containing iconv charset string
  103. * @retval MAPI_E_NOT_FOUND on unknown codepage, lppszCharset will be unchanged.
  104. */
  105. HRESULT HrGetCPByCharset(const char *lpszCharset,ULONG *codepage)
  106. {
  107. for (size_t i = 0; i < ARRAY_SIZE(CPMAP); ++i) {
  108. if(strcasecmp(CPMAP[i].charset, lpszCharset) == 0) {
  109. *codepage = CPMAP[i].codepage;
  110. return hrSuccess;
  111. }
  112. }
  113. return MAPI_E_NOT_FOUND;
  114. }
  115. } /* namespace */