123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263 |
- #include <kopano/platform.h>
- #include <kopano/charset/convert.h>
- #include <mapicode.h>
- #include <numeric>
- #include <vector>
- #include <stdexcept>
- #include <string>
- #include <kopano/stringutil.h>
- #include <cerrno>
- #define BUFSIZE 4096
- namespace KC {
- convert_exception::convert_exception(enum exception_type type, const std::string &message)
- : std::runtime_error(message)
- , m_type(type)
- {}
- unknown_charset_exception::unknown_charset_exception(const std::string &message)
- : convert_exception(eUnknownCharset, message)
- {}
- illegal_sequence_exception::illegal_sequence_exception(const std::string &message)
- : convert_exception(eIllegalSequence, message)
- {}
- namespace details {
- HRESULT HrFromException(const convert_exception &ce)
- {
- switch (ce.type()) {
- case convert_exception::eUnknownCharset: return MAPI_E_NOT_FOUND;
- case convert_exception::eIllegalSequence: return MAPI_E_INVALID_PARAMETER;
- default: return MAPI_E_CALL_FAILED;
- }
- }
-
-
-
- class ICONV_HACK {
- public:
- ICONV_HACK(const char** ptr) : m_ptr(ptr) { }
-
- operator const char **(void) const { return m_ptr; }
- operator char**() { return const_cast <char**>(m_ptr); }
- private:
- const char** m_ptr;
- };
-
-
-
- iconv_context_base::iconv_context_base(const char* tocode, const char* fromcode)
- {
-
- m_bForce = true;
- m_bHTML = false;
-
- std::string strto = tocode;
- size_t pos = strto.find("//");
- if(pos != std::string::npos) {
- std::string options = strto.substr(pos+2);
- strto = strto.substr(0,pos);
- std::vector<std::string> vOptions = tokenize(options, ",");
- std::vector<std::string> vOptionsFiltered;
- std::vector<std::string>::const_iterator i;
- i = vOptions.begin();
- while(i != vOptions.end()) {
- if (*i == "IGNORE" || *i == "FORCE")
- m_bForce = true;
- else if (*i == "NOIGNORE" || *i == "NOFORCE")
- m_bForce = false;
- else if (*i == "HTMLENTITIES" && strcasecmp(fromcode, CHARSET_WCHAR) == 0)
- m_bHTML = true;
- else
- vOptionsFiltered.push_back(*i);
- ++i;
- }
- if(!vOptionsFiltered.empty()) {
- strto += "//";
- strto += join(vOptionsFiltered.begin(), vOptionsFiltered.end(), std::string(","));
- }
- }
- m_cd = iconv_open(strto.c_str(), fromcode);
- if (m_cd == (iconv_t)(-1))
- throw unknown_charset_exception(strerror(errno));
- }
- iconv_context_base::~iconv_context_base()
- {
- if (m_cd != (iconv_t)(-1))
- iconv_close(m_cd);
- }
- void iconv_context_base::doconvert(const char *lpFrom, size_t cbFrom)
- {
- char buf[BUFSIZE];
- const char *lpSrc = NULL;
- char *lpDst = NULL;
- size_t cbSrc = 0;
- size_t cbDst = 0;
- size_t err;
-
- lpSrc = lpFrom;
- cbSrc = cbFrom;
-
- while(cbSrc) {
- lpDst = buf;
- cbDst = sizeof(buf);
- err = iconv(m_cd, ICONV_HACK(&lpSrc), &cbSrc, &lpDst, &cbDst);
-
- if (err != static_cast<size_t>(-1) || cbDst != sizeof(buf)) {
-
- append(buf, sizeof(buf) - cbDst);
- continue;
- }
- if (m_bHTML) {
- if(cbSrc < sizeof(wchar_t)) {
-
- ++lpSrc;
- --cbSrc;
- continue;
- }
-
- std::wstring wstrEntity = L"&#";
- size_t cbEntity;
- wchar_t code;
- const char *lpEntity;
- memcpy(&code, lpSrc, sizeof(code));
- wstrEntity += std::to_wstring(code);
- wstrEntity += L";";
- cbEntity = wstrEntity.size() * sizeof(wchar_t);
- lpEntity = (const char *)wstrEntity.c_str();
-
-
- err = iconv(m_cd, ICONV_HACK(&lpEntity), &cbEntity, &lpDst, &cbDst);
- if (err == static_cast<size_t>(-1))
- assert(false);
- lpSrc += sizeof(wchar_t);
- cbSrc -= sizeof(wchar_t);
- } else if (m_bForce) {
-
- if (cbSrc) {
- ++lpSrc;
- --cbSrc;
- }
- } else {
- throw illegal_sequence_exception(strerror(errno));
- }
-
- append(buf, sizeof(buf) - cbDst);
- }
-
- lpDst = buf;
- cbDst = sizeof(buf);
- err = iconv(m_cd, NULL, NULL, &lpDst, &cbDst);
- append(buf, sizeof(buf) - cbDst);
- }
-
- }
- convert_context::~convert_context()
- {
- for (auto &ictx : m_contexts)
- delete ictx.second;
- for (auto &icode : m_codes)
- delete[] icode;
- }
- void convert_context::persist_code(context_key &key, unsigned flags)
- {
- if (flags & pfToCode) {
- code_set::const_iterator iCode = m_codes.find(key.tocode);
- if (iCode == m_codes.cend()) {
- auto tocode = new char[strlen(key.tocode)+1];
- memcpy(tocode, key.tocode, strlen(key.tocode) + 1);
- iCode = m_codes.insert(tocode).first;
- }
- key.tocode = *iCode;
- }
- if (flags & pfFromCode) {
- code_set::const_iterator iCode = m_codes.find(key.fromcode);
- if (iCode == m_codes.cend()) {
- auto fromcode = new char[strlen(key.fromcode)+1];
- memcpy(fromcode, key.fromcode, strlen(key.fromcode) + 1);
- iCode = m_codes.insert(fromcode).first;
- }
- key.fromcode = *iCode;
- }
- }
- char* convert_context::persist_string(const std::string &strValue)
- {
- m_lstStrings.push_back(strValue);
- return const_cast<char*>(m_lstStrings.back().c_str());
- }
- wchar_t* convert_context::persist_string(const std::wstring &wstrValue)
- {
- m_lstWstrings.push_back(wstrValue);
- return const_cast<wchar_t*>(m_lstWstrings.back().c_str());
- }
- }
|