stringutil.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709
  1. /*
  2. * Copyright 2005 - 2016 Zarafa and its licensors
  3. *
  4. * This program is free software: you can redistribute it and/or modify
  5. * it under the terms of the GNU Affero General Public License, version 3,
  6. * as published by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. * GNU Affero General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU Affero General Public License
  14. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  15. *
  16. */
  17. #include <kopano/platform.h>
  18. #include <algorithm>
  19. #include <sstream>
  20. #include <stdexcept>
  21. #include <string>
  22. #include <cctype>
  23. #include <kopano/stringutil.h>
  24. #include <kopano/charset/convert.h>
  25. #include <kopano/ECIConv.h>
  26. #include <openssl/md5.h>
  27. namespace KC {
  28. std::string stringify(unsigned int x, bool usehex, bool _signed) {
  29. char szBuff[33];
  30. if(usehex)
  31. sprintf(szBuff, "0x%08X", x);
  32. else if (_signed)
  33. sprintf(szBuff, "%d", x);
  34. else
  35. sprintf(szBuff, "%u", x);
  36. return szBuff;
  37. }
  38. std::string stringify_int64(int64_t x, bool usehex) {
  39. std::ostringstream s;
  40. if (usehex) {
  41. s.flags(std::ios::showbase);
  42. s.setf(std::ios::hex, std::ios::basefield); // showbase && basefield: add 0x prefix
  43. s.setf(std::ios::uppercase);
  44. }
  45. s << x;
  46. return s.str();
  47. }
  48. std::string stringify_float(float x) {
  49. std::ostringstream s;
  50. s << x;
  51. return s.str();
  52. }
  53. std::string stringify_double(double x, int prec, bool bLocale) {
  54. std::ostringstream s;
  55. s.precision(prec);
  56. s.setf(std::ios::fixed,std::ios::floatfield);
  57. if (bLocale) {
  58. try {
  59. std::locale l("");
  60. s.imbue(l);
  61. } catch (std::runtime_error &) {
  62. // locale not available, print in C
  63. }
  64. s << x;
  65. } else
  66. s << x;
  67. return s.str();
  68. }
  69. // FIXME support only unsigned int!!!
  70. std::wstring wstringify(unsigned int x, bool usehex, bool _signed)
  71. {
  72. std::wostringstream s;
  73. if (usehex) {
  74. s.flags(std::ios::showbase);
  75. s.setf(std::ios::hex, std::ios::basefield); // showbase && basefield: add 0x prefix
  76. s.setf(std::ios::uppercase);
  77. }
  78. s << x;
  79. return s.str();
  80. }
  81. unsigned int xtoi(const char *lpszHex)
  82. {
  83. unsigned int ulHex = 0;
  84. sscanf(lpszHex, "%X", &ulHex);
  85. return ulHex;
  86. }
  87. int memsubstr(const void* haystack, size_t haystackSize, const void* needle, size_t needleSize)
  88. {
  89. size_t pos = 0;
  90. size_t match = 0;
  91. auto searchbuf = static_cast<const BYTE *>(needle);
  92. auto databuf = static_cast<const BYTE *>(haystack);
  93. if(haystackSize < needleSize)
  94. return (haystackSize-needleSize);
  95. while(pos < haystackSize)
  96. {
  97. if(*databuf == *searchbuf){
  98. ++searchbuf;
  99. ++match;
  100. if(match == needleSize)
  101. return 0;
  102. }else{
  103. databuf -= match;
  104. pos -= match;
  105. searchbuf = (BYTE*)needle;
  106. match = 0;
  107. }
  108. ++databuf;
  109. ++pos;
  110. }
  111. return 1;
  112. }
  113. std::string str_storage(uint64_t ulBytes, bool bUnlimited) {
  114. static double MB = 1024.0 * 1024.0;
  115. if (ulBytes == 0 && bUnlimited)
  116. return "unlimited";
  117. return stringify_double((double)ulBytes / MB, 2) + " MB";
  118. }
  119. std::string GetServerNameFromPath(const char *szPath) {
  120. std::string path = szPath;
  121. size_t pos = 0;
  122. pos = path.find("://");
  123. if (pos != std::string::npos) {
  124. /* Remove prefixed type information */
  125. path.erase(0, pos + 3);
  126. }
  127. pos = path.find(':');
  128. if (pos != std::string::npos)
  129. path.erase(pos, std::string::npos);
  130. return path;
  131. }
  132. std::string GetServerPortFromPath(const char *szPath) {
  133. std::string path = szPath;
  134. size_t pos = 0;
  135. if (strncmp(path.c_str(), "http", 4) != 0)
  136. return std::string();
  137. pos = path.rfind(':');
  138. if (pos == std::string::npos)
  139. return std::string();
  140. pos += 1; /* Skip ':' */
  141. /* Remove all leading characters */
  142. path.erase(0, pos);
  143. /* Strip additional path */
  144. pos = path.rfind('/');
  145. if (pos != std::string::npos)
  146. path.erase(pos, std::string::npos);
  147. return path;
  148. }
  149. std::string shell_escape(const std::string &str)
  150. {
  151. std::string escaped;
  152. auto start = str.cbegin();
  153. auto ptr = start;
  154. while (ptr != str.cend()) {
  155. while (ptr != str.end() && *ptr != '\'')
  156. ++ptr;
  157. escaped += std::string(start, ptr);
  158. if (ptr == str.end())
  159. break;
  160. start = ++ptr; // skip single quote
  161. escaped += "'\\''"; // shell escape sequence
  162. }
  163. return escaped;
  164. }
  165. std::string shell_escape(const std::wstring &wstr)
  166. {
  167. std::string strLocale = convert_to<std::string>(wstr);
  168. return shell_escape(strLocale);
  169. }
  170. std::vector<std::wstring> tokenize(const std::wstring &strInput, const WCHAR sep, bool bFilterEmpty) {
  171. const WCHAR *begin, *end = NULL;
  172. std::vector<std::wstring> vct;
  173. begin = strInput.c_str();
  174. while (*begin != '\0') {
  175. end = wcschr(begin, sep);
  176. if (!end) {
  177. vct.push_back(begin);
  178. break;
  179. }
  180. if (!bFilterEmpty || std::distance(begin,end) > 0)
  181. vct.push_back(std::wstring(begin,end));
  182. begin = end+1;
  183. }
  184. return vct;
  185. }
  186. std::vector<std::string> tokenize(const std::string &strInput, const char sep, bool bFilterEmpty) {
  187. const char *begin, *last, *end = NULL;
  188. std::vector<std::string> vct;
  189. begin = strInput.c_str();
  190. last = begin + strInput.length();
  191. while (begin < last) {
  192. end = strchr(begin, sep);
  193. if (!end) {
  194. vct.push_back(begin);
  195. break;
  196. }
  197. if (!bFilterEmpty || std::distance(begin,end) > 0)
  198. vct.push_back(std::string(begin,end));
  199. begin = end+1;
  200. }
  201. return vct;
  202. }
  203. std::string trim(const std::string &strInput, const std::string &strTrim)
  204. {
  205. std::string s = strInput;
  206. size_t pos;
  207. if (s.empty())
  208. return s;
  209. pos = s.find_first_not_of(strTrim);
  210. s.erase(0, pos);
  211. pos = s.find_last_not_of(strTrim);
  212. if (pos != std::string::npos)
  213. s.erase(pos + 1, std::string::npos);
  214. return s;
  215. }
  216. unsigned char x2b(char c)
  217. {
  218. if (c >= '0' && c <= '9')
  219. // expects sensible input
  220. return c - '0';
  221. else if (c >= 'a')
  222. return c - 'a' + 10;
  223. return c - 'A' + 10;
  224. }
  225. std::string hex2bin(const std::string &input)
  226. {
  227. std::string buffer;
  228. if (input.length() % 2 != 0)
  229. return buffer;
  230. buffer.reserve(input.length() / 2);
  231. for (unsigned int i = 0; i < input.length(); ) {
  232. unsigned char c;
  233. c = x2b(input[i++]) << 4;
  234. c |= x2b(input[i++]);
  235. buffer += c;
  236. }
  237. return buffer;
  238. }
  239. std::string hex2bin(const std::wstring &input)
  240. {
  241. std::string buffer;
  242. if (input.length() % 2 != 0)
  243. return buffer;
  244. buffer.reserve(input.length() / 2);
  245. for (unsigned int i = 0; i < input.length(); ) {
  246. unsigned char c;
  247. c = x2b((char)input[i++]) << 4;
  248. c |= x2b((char)input[i++]);
  249. buffer += c;
  250. }
  251. return buffer;
  252. }
  253. std::string bin2hex(unsigned int inLength, const unsigned char *input)
  254. {
  255. static const char digits[] = "0123456789ABCDEF";
  256. std::string buffer;
  257. if (!input)
  258. return buffer;
  259. buffer.reserve(inLength * 2);
  260. for (unsigned int i = 0; i < inLength; ++i) {
  261. buffer += digits[input[i]>>4];
  262. buffer += digits[input[i]&0x0F];
  263. }
  264. return buffer;
  265. }
  266. std::string bin2hex(const std::string &input)
  267. {
  268. return bin2hex((unsigned int)input.size(), (const unsigned char*)input.c_str());
  269. }
  270. std::wstring bin2hexw(unsigned int inLength, const unsigned char *input)
  271. {
  272. static const wchar_t digits[] = L"0123456789ABCDEF";
  273. std::wstring buffer;
  274. if (!input)
  275. return buffer;
  276. buffer.reserve(inLength * 2);
  277. for (unsigned int i = 0; i < inLength; ++i) {
  278. buffer += digits[input[i]>>4];
  279. buffer += digits[input[i]&0x0F];
  280. }
  281. return buffer;
  282. }
  283. std::wstring bin2hexw(const std::string &input)
  284. {
  285. return bin2hexw((unsigned int)input.size(), (const unsigned char*)input.c_str());
  286. }
  287. /**
  288. * Encodes a string for inclusion into an url.
  289. *
  290. * @note this does not encode an url to another more valid url (since / would get encoded!)
  291. * @note watch the locale of the string, make sure it's the same as the rest of the url.
  292. *
  293. * @param[in] input string to encode
  294. *
  295. * @return encoded string valid to include in an url
  296. */
  297. std::string urlEncode(const std::string &input)
  298. {
  299. std::string output;
  300. static const char digits[] = "0123456789ABCDEF";
  301. output.reserve(input.length());
  302. for (size_t i = 0; i < input.length(); ++i) {
  303. if (static_cast<unsigned char>(input[i]) <= 33 ||
  304. static_cast<unsigned char>(input[i]) >= 128) {
  305. output += '%';
  306. output += digits[input[i] >> 4];
  307. output += digits[input[i] & 0x0F];
  308. continue;
  309. }
  310. switch (input[i]) {
  311. case ':':
  312. case '/':
  313. case '?':
  314. case '#':
  315. case '[':
  316. case ']':
  317. case '@':
  318. case '!':
  319. case '$':
  320. case '&':
  321. case '\'':
  322. case '(':
  323. case ')':
  324. case '*':
  325. case '+':
  326. case ',':
  327. case ';':
  328. case '=':
  329. output += '%';
  330. output += digits[input[i] >> 4];
  331. output += digits[input[i] & 0x0F];
  332. break;
  333. default:
  334. output += input[i];
  335. }
  336. }
  337. return output;
  338. }
  339. /**
  340. * encode an url part, input in wide char, and destination charset in encoded characters
  341. *
  342. * @param[in] input wide string to convert to valid url encoded ascii string
  343. * @param[in] charset non-ascii characters will be encoded for this charset
  344. *
  345. * @return url valid encoded string
  346. */
  347. std::string urlEncode(const std::wstring &input, const char* charset)
  348. {
  349. return urlEncode(convert_to<std::string>(charset, input, rawsize(input), CHARSET_WCHAR));
  350. }
  351. std::string urlEncode(const WCHAR* input, const char* charset)
  352. {
  353. return urlEncode(convert_to<std::string>(charset, input, rawsize(input), CHARSET_WCHAR));
  354. }
  355. /**
  356. * replaces %## values by ascii values
  357. * i.e Amsterdam%2C -> Amsterdam,
  358. * @note 1. this can take a full url, since it just replaces the %##
  359. * @note 2. you need to handle the locale of the string yourself!
  360. *
  361. * @param[in] input url encoded string
  362. *
  363. * @return decoded url in the locale it was encoded in
  364. */
  365. std::string urlDecode(const std::string &input)
  366. {
  367. std::string output;
  368. output.reserve(input.length());
  369. for (size_t i = 0; i < input.length(); ++i) {
  370. if (input[i] == '%' && input.length() > i + 2)
  371. {
  372. unsigned char c;
  373. c = x2b(input[++i]) << 4;
  374. c |= x2b(input[++i]);
  375. output += c;
  376. }
  377. else
  378. output += input[i];
  379. }
  380. return output;
  381. }
  382. /**
  383. * Convert a memory buffer with strings with Unix \n enters to DOS
  384. * \r\n enters.
  385. *
  386. * @param[in] size length of the input
  387. * @param[in] input buffer containing strings with enters to convert
  388. * @param[out] output buffer with enough space to hold input + extra \r characters
  389. * @param[out] outsize number of characters written to output
  390. */
  391. void BufferLFtoCRLF(size_t size, const char *input, char *output, size_t *outsize) {
  392. size_t j = 0;
  393. for (size_t i = 0; i < size; ++i) {
  394. if (input[i] == '\r') {
  395. if ((i+1) < size && input[i+1] == '\n') {
  396. output[j++] = '\r';
  397. output[j++] = '\n';
  398. ++i;
  399. } else {
  400. output[j++] = '\r';
  401. output[j++] = '\n';
  402. }
  403. } else if (input[i] == '\n') {
  404. output[j++] = '\r';
  405. output[j++] = '\n';
  406. } else {
  407. output[j++] = input[i];
  408. }
  409. }
  410. output[j] = '\0';
  411. *outsize = j;
  412. }
  413. /**
  414. * converts Tabs in a string to spaces
  415. *
  416. * @param[in] strInput input string to be converted
  417. * @param[out] strOutput return converted string
  418. */
  419. void StringTabtoSpaces(const std::wstring &strInput, std::wstring *lpstrOutput) {
  420. std::wstring strOutput;
  421. strOutput.reserve(strInput.length());
  422. for (auto c : strInput)
  423. if (c == '\t')
  424. strOutput.append(4, ' ');
  425. else
  426. strOutput.append(1, c);
  427. lpstrOutput->swap(strOutput);
  428. }
  429. /**
  430. * converts CRLF in a string to LF
  431. *
  432. * @param[in] strInput input string to be converted
  433. * @param[out] strOutput return converted string
  434. */
  435. void StringCRLFtoLF(const std::wstring &strInput, std::wstring *lpstrOutput) {
  436. std::wstring::const_iterator iInput(strInput.begin());
  437. std::wstring strOutput;
  438. strOutput.reserve(strInput.length());
  439. for (; iInput != strInput.end(); ++iInput) {
  440. // skips /r if /r/n found together in the text
  441. if (*iInput == '\r' && (iInput + 1 != strInput.end() && *(iInput + 1) == '\n'))
  442. continue;
  443. else
  444. strOutput.append(1, *iInput);
  445. }
  446. lpstrOutput->swap(strOutput);
  447. }
  448. /**
  449. * converts a string inline from \n enters to \r\n
  450. *
  451. * @param strInOut string to edit
  452. */
  453. void StringLFtoCRLF(std::string &strInOut)
  454. {
  455. std::string strOutput;
  456. std::string::const_iterator i;
  457. strOutput.reserve(strInOut.size());
  458. for (i = strInOut.begin(); i != strInOut.end(); ++i)
  459. if (*i == '\n' && i != strInOut.begin() && *(i-1) != '\r')
  460. strOutput.append("\r\n");
  461. else
  462. strOutput.append(1, *i);
  463. swap(strInOut, strOutput);
  464. }
  465. std::string format(const char *const fmt, ...) {
  466. char *buffer = NULL;
  467. va_list ap;
  468. va_start(ap, fmt);
  469. (void)vasprintf(&buffer, fmt, ap);
  470. va_end(ap);
  471. std::string result = buffer;
  472. free(buffer);
  473. return result;
  474. }
  475. char *kc_strlcpy(char *dest, const char *src, size_t n)
  476. {
  477. strncpy(dest, src, n);
  478. dest[n-1] = '\0';
  479. return dest;
  480. }
  481. bool kc_starts_with(const std::string &full, const std::string &prefix)
  482. {
  483. return full.compare(0, prefix.size(), prefix) == 0;
  484. }
  485. bool kc_istarts_with(const std::string &full, const std::string &needle)
  486. {
  487. return kc_starts_with(strToLower(full), strToLower(needle));
  488. }
  489. bool kc_ends_with(const std::string &full, const std::string &prefix)
  490. {
  491. size_t fz = full.size(), pz = prefix.size();
  492. if (fz < pz)
  493. return false;
  494. return full.compare(fz - pz, pz, prefix);
  495. }
  496. static const std::string base64_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
  497. static inline bool is_base64(unsigned char c)
  498. {
  499. return isalnum(c) || c == '+' || c == '/';
  500. }
  501. std::string base64_encode(const unsigned char *bytes_to_encode, unsigned int in_len)
  502. {
  503. unsigned char char_array_3[3], char_array_4[4];
  504. int i = 0, j = 0;
  505. std::string ret;
  506. while (in_len--) {
  507. char_array_3[i++] = *(bytes_to_encode++);
  508. if (i != 3)
  509. continue;
  510. char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
  511. char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
  512. char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
  513. char_array_4[3] = char_array_3[2] & 0x3f;
  514. for (i = 0; i < 4; ++i)
  515. ret += base64_chars[char_array_4[i]];
  516. i = 0;
  517. }
  518. if (i == 0)
  519. return ret;
  520. for (j = i; j < 3; ++j)
  521. char_array_3[j] = '\0';
  522. char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
  523. char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
  524. char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
  525. char_array_4[3] = char_array_3[2] & 0x3f;
  526. for (j = 0; j < i + 1; ++j)
  527. ret += base64_chars[char_array_4[j]];
  528. while ((i++ < 3))
  529. ret += '=';
  530. return ret;
  531. }
  532. std::string base64_decode(const std::string &encoded_string)
  533. {
  534. int in_len = encoded_string.size(), i = 0, j = 0, in_ = 0;
  535. unsigned char char_array_4[4], char_array_3[3];
  536. std::string ret;
  537. while (in_len-- && encoded_string[in_] != '=' && is_base64(encoded_string[in_])) {
  538. char_array_4[i++] = encoded_string[in_++];
  539. if (i != 4)
  540. continue;
  541. for (i = 0; i < 4; ++i)
  542. char_array_4[i] = base64_chars.find(char_array_4[i]);
  543. char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
  544. char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
  545. char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
  546. for (i = 0; i < 3; ++i)
  547. ret += char_array_3[i];
  548. i = 0;
  549. }
  550. if (i == 0)
  551. return ret;
  552. for (j = i; j < 4; ++j)
  553. char_array_4[j] = 0;
  554. for (j = 0; j < 4; ++j)
  555. char_array_4[j] = base64_chars.find(char_array_4[j]);
  556. char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
  557. char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
  558. char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
  559. for (j = 0; j < i - 1; ++j)
  560. ret += char_array_3[j];
  561. return ret;
  562. }
  563. std::string zcp_md5_final_hex(MD5_CTX *ctx)
  564. {
  565. static const char hex[] = "0123456789abcdef";
  566. unsigned char md[MD5_DIGEST_LENGTH];
  567. std::string s;
  568. s.reserve(2 * sizeof(md));
  569. MD5_Final(md, ctx);
  570. for (size_t z = 0; z < sizeof(md); ++z) {
  571. s.push_back(hex[(md[z] & 0xF0) >> 4]);
  572. s.push_back(hex[md[z] & 0xF]);
  573. }
  574. return s;
  575. }
  576. std::string string_strip_nuls(const std::string &i)
  577. {
  578. std::string o;
  579. std::copy_if(i.cbegin(), i.cend(), std::back_inserter(o),
  580. [](char c) { return c != '\0'; });
  581. return o;
  582. }
  583. std::wstring string_strip_nuls(const std::wstring &i)
  584. {
  585. std::wstring o;
  586. std::copy_if(i.cbegin(), i.cend(), std::back_inserter(o),
  587. [](wchar_t c) { return c != L'\0'; });
  588. return o;
  589. }
  590. } /* namespace */