ustringutil.cpp 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142
  1. /*
  2. * Copyright 2005 - 2016 Zarafa and its licensors
  3. *
  4. * This program is free software: you can redistribute it and/or modify
  5. * it under the terms of the GNU Affero General Public License, version 3,
  6. * as published by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. * GNU Affero General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU Affero General Public License
  14. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  15. *
  16. */
  17. /**
  18. @file
  19. Unicode String Utilities
  20. @defgroup ustringutil Unicode String Utilities
  21. @{
  22. The Unicode String Utilities provide some common string utilities aimed to be compliant with
  23. all (or at least most) of the Unicode quirks.
  24. The provided functions are:
  25. - str_equals, wcs_equals, u8_equals: Check if two strings are equal.
  26. - str_iequals, wcs_iequals, u8_iequals: Check if two strings are equal ignoring case.
  27. - str_startswith, wcs_startswith, u8_startswith: Check if one string starts with another.
  28. - str_istartswith, wcs_istartswith, u8_istartswith: Check if one string starts with another ignoring case.
  29. - str_icompare, wcs_icompare, u8_icompare: Compare two strings ignoring case.
  30. - str_contains, wcs_contains, u8_contains: Check if one string contains the other.
  31. - str_icontains, wcs_icontains, u8_icontains: Check if one string contains the other ignoring case.
  32. @par Normalization
  33. In order to compare unicode strings, the data needs to be normailized first. This is needed because Unicode allows
  34. different binary representations of the same data. The functions provide in this module make no assumptions about
  35. the provided data and will always perform a normalization before doing a comparison.
  36. @par Case mapping
  37. The case insensitive functions need a way to match code points regardless of their case. ICU provides a few methods for
  38. this, but they use a method called case-folding to avoid the need for a locale (changing case is dependant on a locale).
  39. Since case-folding doesn't take a locale, it's a best guess method, which will produce wrong results in certain situations.
  40. The functions in this library apply a method called case-mapping, which basically means we perform a to-upper on all
  41. code-points with a provided locale.
  42. @par Collation
  43. The functions that try to match (sub)strings, have no interest in the order in which strings would appear if they would be
  44. sorted. However, the compare functions do produce a result that could be used for sorting. Since sorting is dependant on a
  45. locale as well, they would need a locale. However, ICU provides a Collator class that performs the actual comparison for a
  46. particular locale. Since we don't want to construct a Collator class for every string comparison, the string comparison
  47. functions take a Collator object as argument. This way the caller can reuse the Collator.
  48. @par Performance
  49. Performance of the current (21-05-2010) implementation is probably pretty bad. This is caused by all the conversion that are
  50. performed on the complete strings before the actual comparison is even started.
  51. At some point we need to rewqrite these functions to do all the conversion on the fly to minimize processing.
  52. */
  53. #include "config.h"
  54. #include <kopano/platform.h>
  55. #include <kopano/ustringutil.h>
  56. #include <kopano/CommonUtil.h>
  57. #include "utf8/unchecked.h"
  58. #include <cassert>
  59. #include <memory>
  60. #include <unicode/unorm.h>
  61. #include <unicode/coll.h>
  62. #include <unicode/tblcoll.h>
  63. #include <unicode/coleitr.h>
  64. #include <unicode/normlzr.h>
  65. #include <unicode/ustring.h>
  66. #include "ustringutil/utfutil.h"
  67. typedef std::unique_ptr<Collator> unique_ptr_Collator;
  68. namespace KC {
  69. /**
  70. * US-ASCII version to find a case-insensitive string part in a
  71. * haystack.
  72. *
  73. * @param haystack search this haystack for a case-insensitive needle
  74. * @param needle search this needle in the case-insensitive haystack
  75. *
  76. * @return pointer where needle is found or NULL
  77. */
  78. const char* str_ifind(const char *haystack, const char *needle)
  79. {
  80. locale_t loc = createlocale(LC_CTYPE, "C");
  81. const char *needlepos = needle;
  82. const char *needlestart = haystack;
  83. while(*haystack) {
  84. if (toupper_l(*haystack, loc) == toupper_l(*needlepos, loc)) {
  85. ++needlepos;
  86. if(*needlepos == 0)
  87. goto exit;
  88. } else {
  89. haystack = needlestart++;
  90. needlepos = needle;
  91. }
  92. ++haystack;
  93. }
  94. needlestart = NULL;
  95. exit:
  96. freelocale(loc);
  97. return needlestart;
  98. }
  99. /**
  100. * Check if two strings are canonical equivalent.
  101. *
  102. * @param[in] s1 The string to compare s2 with.
  103. * @param[in] s2 The string to compare s1 with.
  104. * @param[in] locale The locale used to perform string collation.
  105. *
  106. * @return boolean
  107. * @retval true The strings are canonical equivalent
  108. * @retval false The strings are not canonical equivalent
  109. */
  110. bool str_equals(const char *s1, const char *s2, const ECLocale &locale)
  111. {
  112. assert(s1);
  113. assert(s2);
  114. UnicodeString a = StringToUnicode(s1);
  115. UnicodeString b = StringToUnicode(s2);
  116. return a.compare(b) == 0;
  117. }
  118. /**
  119. * Check if two strings are canonical equivalent when ignoring the case.
  120. *
  121. * @param[in] s1 The string to compare s2 with.
  122. * @param[in] s2 The string to compare s1 with.
  123. * @param[in] locale The locale used to convert the case of the strings.
  124. *
  125. * @return boolean
  126. * @retval true The strings are canonical equivalent
  127. * @retval false The strings are not canonical equivalent
  128. */
  129. bool str_iequals(const char *s1, const char *s2, const ECLocale &locale)
  130. {
  131. assert(s1);
  132. assert(s2);
  133. UnicodeString a = StringToUnicode(s1);
  134. UnicodeString b = StringToUnicode(s2);
  135. return a.caseCompare(b, 0) == 0;
  136. }
  137. /**
  138. * Check if the string s1 starts with s2.
  139. *
  140. * @param[in] s1 The string to compare s2 with.
  141. * @param[in] s2 The string to compare s1 with.
  142. * @param[in] locale The locale used to perform string collation.
  143. *
  144. * @return boolean
  145. * @retval true The strings are canonical equivalent
  146. * @retval false The strings are not canonical equivalent
  147. */
  148. bool str_startswith(const char *s1, const char *s2, const ECLocale &locale)
  149. {
  150. assert(s1);
  151. assert(s2);
  152. UnicodeString a = StringToUnicode(s1);
  153. UnicodeString b = StringToUnicode(s2);
  154. return a.compare(0, b.length(), b) == 0;
  155. }
  156. /**
  157. * Check if the string s1 starts with s2 when ignoring the case.
  158. *
  159. * @param[in] s1 The string to compare s2 with.
  160. * @param[in] s2 The string to compare s1 with.
  161. * @param[in] locale The locale used to convert the case of the strings.
  162. *
  163. * @return boolean
  164. * @retval true The strings are canonical equivalent
  165. * @retval false The strings are not canonical equivalent
  166. */
  167. bool str_istartswith(const char *s1, const char *s2, const ECLocale &locale)
  168. {
  169. assert(s1);
  170. assert(s2);
  171. UnicodeString a = StringToUnicode(s1);
  172. UnicodeString b = StringToUnicode(s2);
  173. return a.caseCompare(0, b.length(), b, 0) == 0;
  174. }
  175. /**
  176. * Compare two strings using the collator to determine the sort order.
  177. *
  178. * Both strings are expectes to be in the current locale. The comparison is
  179. * case insensitive. Effectively this only changes behavior compared to strcmp_unicode
  180. * if the two strings are the same if the case is discarded. It doesn't effect the
  181. * sorting in any other way.
  182. *
  183. * @param[in] s1 The string to compare s2 with.
  184. * @param[in] s2 The string to compare s1 with.
  185. * @param[in] collator The collator used to determine which string precedes the other.
  186. *
  187. * @return An integer.
  188. * @retval -1 s1 is smaller than s2
  189. * @retval 0 s1 equals s2.
  190. * @retval 1 s1 is greater than s2
  191. */
  192. int str_icompare(const char *s1, const char *s2, const ECLocale &locale)
  193. {
  194. assert(s1);
  195. assert(s2);
  196. UErrorCode status = U_ZERO_ERROR;
  197. unique_ptr_Collator ptrCollator(Collator::createInstance(locale, status));
  198. UnicodeString a = StringToUnicode(s1);
  199. UnicodeString b = StringToUnicode(s2);
  200. a.foldCase();
  201. b.foldCase();
  202. return ptrCollator->compare(a,b,status);
  203. }
  204. /**
  205. * Find a string in another string.
  206. *
  207. * @param[in] haystack The string to search in
  208. * @param[in] needle The string to search for.
  209. * @param[in] locale The locale used to perform string collation.
  210. *
  211. * @return boolean
  212. * @retval true The needle was found
  213. * @retval false The needle wasn't found
  214. *
  215. * @note This function behaves different than strstr in that it returns a
  216. * a boolean instead of a pointer to the found substring. This is
  217. * because we search on a transformed string. Getting the correct
  218. * pointer would involve additional processing while we don't need
  219. * the result anyway.
  220. */
  221. bool str_contains(const char *haystack, const char *needle, const ECLocale &locale)
  222. {
  223. assert(haystack);
  224. assert(needle);
  225. UnicodeString a = StringToUnicode(haystack);
  226. UnicodeString b = StringToUnicode(needle);
  227. return u_strstr(a.getTerminatedBuffer(), b.getTerminatedBuffer());
  228. }
  229. /**
  230. * Find a string in another string while ignoreing case.
  231. *
  232. * @param[in] haystack The string to search in
  233. * @param[in] needle The string to search for.
  234. * @param[in] locale The locale used to convert the case of the strings.
  235. *
  236. * @return boolean
  237. * @retval true The needle was found
  238. * @retval false The needle wasn't found
  239. */
  240. bool str_icontains(const char *haystack, const char *needle, const ECLocale &locale)
  241. {
  242. assert(haystack);
  243. assert(needle);
  244. UnicodeString a = StringToUnicode(haystack);
  245. UnicodeString b = StringToUnicode(needle);
  246. a.foldCase();
  247. b.foldCase();
  248. return u_strstr(a.getTerminatedBuffer(), b.getTerminatedBuffer());
  249. }
  250. /**
  251. * Check if two strings are canonical equivalent.
  252. *
  253. * @param[in] s1 The string to compare s2 with.
  254. * @param[in] s2 The string to compare s1 with.
  255. * @param[in] locale The locale used to perform string collation.
  256. *
  257. * @return boolean
  258. * @retval true The strings are canonical equivalent
  259. * @retval false The strings are not canonical equivalent
  260. */
  261. bool wcs_equals(const wchar_t *s1, const wchar_t *s2, const ECLocale &locale)
  262. {
  263. assert(s1);
  264. assert(s2);
  265. UnicodeString a = WCHARToUnicode(s1);
  266. UnicodeString b = WCHARToUnicode(s2);
  267. return a.compare(b) == 0;
  268. }
  269. /**
  270. * Check if two strings are canonical equivalent when ignoring the case.
  271. *
  272. * @param[in] s1 The string to compare s2 with.
  273. * @param[in] s2 The string to compare s1 with.
  274. * @param[in] locale The locale used to convert the case of the strings.
  275. *
  276. * @return boolean
  277. * @retval true The strings are canonical equivalent
  278. * @retval false The strings are not canonical equivalent
  279. */
  280. bool wcs_iequals(const wchar_t *s1, const wchar_t *s2, const ECLocale &locale)
  281. {
  282. assert(s1);
  283. assert(s2);
  284. UnicodeString a = WCHARToUnicode(s1);
  285. UnicodeString b = WCHARToUnicode(s2);
  286. return a.caseCompare(b, 0) == 0;
  287. }
  288. /**
  289. * Check if s1 starts with s2.
  290. *
  291. * @param[in] s1 The string to compare s2 with.
  292. * @param[in] s2 The string to compare s1 with.
  293. * @param[in] locale The locale used to perform string collation.
  294. *
  295. * @return boolean
  296. * @retval true The strings are canonical equivalent
  297. * @retval false The strings are not canonical equivalent
  298. */
  299. bool wcs_startswith(const wchar_t *s1, const wchar_t *s2, const ECLocale &locale)
  300. {
  301. assert(s1);
  302. assert(s2);
  303. UnicodeString a = WCHARToUnicode(s1);
  304. UnicodeString b = WCHARToUnicode(s2);
  305. return a.compare(0, b.length(), b) == 0;
  306. }
  307. /**
  308. * Check if s1 starts with s2 when ignoring the case.
  309. *
  310. * @param[in] s1 The string to compare s2 with.
  311. * @param[in] s2 The string to compare s1 with.
  312. * @param[in] locale The locale used to convert the case of the strings.
  313. *
  314. * @return boolean
  315. * @retval true The strings are canonical equivalent
  316. * @retval false The strings are not canonical equivalent
  317. */
  318. bool wcs_istartswith(const wchar_t *s1, const wchar_t *s2, const ECLocale &locale)
  319. {
  320. assert(s1);
  321. assert(s2);
  322. UnicodeString a = WCHARToUnicode(s1);
  323. UnicodeString b = WCHARToUnicode(s2);
  324. return a.caseCompare(0, b.length(), b, 0) == 0;
  325. }
  326. /**
  327. * Compare two strings using the collator to determine the sort order.
  328. *
  329. * Both strings are expectes to be in the current locale. The comparison is
  330. * case insensitive. Effectively this only changes behavior compared to strcmp_unicode
  331. * if the two strings are the same if the case is discarded. It doesn't effect the
  332. * sorting in any other way.
  333. *
  334. * @param[in] s1 The string to compare s2 with.
  335. * @param[in] s2 The string to compare s1 with.
  336. * @param[in] collator The collator used to determine which string precedes the other.
  337. *
  338. * @return An integer.
  339. * @retval -1 s1 is smaller than s2
  340. * @retval 0 s1 equals s2.
  341. * @retval 1 s1 is greater than s2
  342. */
  343. int wcs_icompare(const wchar_t *s1, const wchar_t *s2, const ECLocale &locale)
  344. {
  345. assert(s1);
  346. assert(s2);
  347. UErrorCode status = U_ZERO_ERROR;
  348. unique_ptr_Collator ptrCollator(Collator::createInstance(locale, status));
  349. UnicodeString a = WCHARToUnicode(s1);
  350. UnicodeString b = WCHARToUnicode(s2);
  351. a.foldCase();
  352. b.foldCase();
  353. return ptrCollator->compare(a,b,status);
  354. }
  355. /**
  356. * Find a string in another string.
  357. *
  358. * @param[in] haystack The string to search in
  359. * @param[in] needle The string to search for.
  360. * @param[in] locale The locale used to perform string collation.
  361. *
  362. * @return boolean
  363. * @retval true The needle was found
  364. * @retval false The needle wasn't found
  365. *
  366. * @note This function behaves different than strstr in that it returns a
  367. * a boolean instead of a pointer to the found substring. This is
  368. * because we search on a transformed string. Getting the correct
  369. * pointer would involve additional processing while we don't need
  370. * the result anyway.
  371. */
  372. bool wcs_contains(const wchar_t *haystack, const wchar_t *needle, const ECLocale &locale)
  373. {
  374. assert(haystack);
  375. assert(needle);
  376. UnicodeString a = WCHARToUnicode(haystack);
  377. UnicodeString b = WCHARToUnicode(needle);
  378. return u_strstr(a.getTerminatedBuffer(), b.getTerminatedBuffer());
  379. }
  380. /**
  381. * Find a string in another string while ignoreing case.
  382. *
  383. * @param[in] haystack The string to search in
  384. * @param[in] needle The string to search for.
  385. * @param[in] locale The locale to use when converting case.
  386. *
  387. * @return boolean
  388. * @retval true The needle was found
  389. * @retval false The needle wasn't found
  390. *
  391. * @note This function behaves different than strstr in that it returns a
  392. * a boolean instead of a pointer to the found substring. This is
  393. * because we search on a transformed string. Getting the correct
  394. * pointer would involve additional processing while we don't need
  395. * the result anyway.
  396. */
  397. bool wcs_icontains(const wchar_t *haystack, const wchar_t *needle, const ECLocale &locale)
  398. {
  399. assert(haystack);
  400. assert(needle);
  401. UnicodeString a = WCHARToUnicode(haystack);
  402. UnicodeString b = WCHARToUnicode(needle);
  403. a.foldCase();
  404. b.foldCase();
  405. return u_strstr(a.getTerminatedBuffer(), b.getTerminatedBuffer());
  406. }
  407. /**
  408. * Check if two strings are canonical equivalent.
  409. *
  410. * @param[in] s1 The string to compare s2 with.
  411. * @param[in] s2 The string to compare s1 with.
  412. * @param[in] locale The locale used to perform string collation.
  413. *
  414. * @return boolean
  415. * @retval true The strings are canonical equivalent
  416. * @retval false The strings are not canonical equivalent
  417. */
  418. bool u8_equals(const char *s1, const char *s2, const ECLocale &locale)
  419. {
  420. assert(s1);
  421. assert(s2);
  422. UnicodeString a = UTF8ToUnicode(s1);
  423. UnicodeString b = UTF8ToUnicode(s2);
  424. return a.compare(b) == 0;
  425. }
  426. /**
  427. * Check if two strings are canonical equivalent when ignoring the case.
  428. *
  429. * @param[in] s1 The string to compare s2 with.
  430. * @param[in] s2 The string to compare s1 with.
  431. * @param[in] locale The locale to use when converting case.
  432. *
  433. * @return boolean
  434. * @retval true The strings are canonical equivalent
  435. * @retval false The strings are not canonical equivalent
  436. */
  437. bool u8_iequals(const char *s1, const char *s2, const ECLocale &locale)
  438. {
  439. assert(s1);
  440. assert(s2);
  441. UnicodeString a = UTF8ToUnicode(s1);
  442. UnicodeString b = UTF8ToUnicode(s2);
  443. return a.caseCompare(b, 0) == 0;
  444. }
  445. /**
  446. * Check if s1 starts with s2.
  447. *
  448. * @param[in] s1 The string to compare s2 with.
  449. * @param[in] s2 The string to compare s1 with.
  450. * @param[in] locale The locale used to perform string collation.
  451. *
  452. * @return boolean
  453. * @retval true The strings are canonical equivalent
  454. * @retval false The strings are not canonical equivalent
  455. */
  456. bool u8_startswith(const char *s1, const char *s2, const ECLocale &locale)
  457. {
  458. assert(s1);
  459. assert(s2);
  460. UnicodeString a = UTF8ToUnicode(s1);
  461. UnicodeString b = UTF8ToUnicode(s2);
  462. return a.compare(0, b.length(), b) == 0;
  463. }
  464. /**
  465. * Check if s1 starts with s2 when ignoring the case.
  466. *
  467. * @param[in] s1 The string to compare s2 with.
  468. * @param[in] s2 The string to compare s1 with.
  469. * @param[in] locale The locale to use when converting case.
  470. *
  471. * @return boolean
  472. * @retval true The strings are canonical equivalent
  473. * @retval false The strings are not canonical equivalent
  474. */
  475. bool u8_istartswith(const char *s1, const char *s2, const ECLocale &locale)
  476. {
  477. assert(s1);
  478. assert(s2);
  479. UnicodeString a = UTF8ToUnicode(s1);
  480. UnicodeString b = UTF8ToUnicode(s2);
  481. return a.caseCompare(0, b.length(), b, 0) == 0;
  482. }
  483. /**
  484. * Compare two strings using the collator to determine the sort order.
  485. *
  486. * Both strings are expectes to be encoded in UTF-8. The comparison is
  487. * case insensitive. Effectively this only changes behavior compared to strcmp_unicode
  488. * if the two strings are the same if the case is discarded. It doesn't effect the
  489. * sorting in any other way.
  490. *
  491. * @param[in] s1 The string to compare s2 with.
  492. * @param[in] s2 The string to compare s1 with.
  493. * @param[in] collator The collator used to determine which string precedes the other.
  494. *
  495. * @return An integer.
  496. * @retval -1 s1 is smaller than s2
  497. * @retval 0 s1 equals s2.
  498. * @retval 1 s1 is greater than s2
  499. */
  500. int u8_icompare(const char *s1, const char *s2, const ECLocale &locale)
  501. {
  502. assert(s1);
  503. assert(s2);
  504. UErrorCode status = U_ZERO_ERROR;
  505. unique_ptr_Collator ptrCollator(Collator::createInstance(locale, status));
  506. UnicodeString a = UTF8ToUnicode(s1);
  507. UnicodeString b = UTF8ToUnicode(s2);
  508. a.foldCase();
  509. b.foldCase();
  510. return ptrCollator->compare(a,b,status);
  511. }
  512. /**
  513. * Find a string in another string.
  514. *
  515. * @param[in] haystack The string to search in
  516. * @param[in] needle The string to search for.
  517. * @param[in] locale The locale used to perform string collation.
  518. *
  519. * @return boolean
  520. * @retval true The needle was found
  521. * @retval false The needle wasn't found
  522. *
  523. * @note This function behaves different than strstr in that it returns a
  524. * a boolean instead of a pointer to the found substring. This is
  525. * because we search on a transformed string. Getting the correct
  526. * pointer would involve additional processing while we don't need
  527. * the result anyway.
  528. */
  529. bool u8_contains(const char *haystack, const char *needle, const ECLocale &locale)
  530. {
  531. assert(haystack);
  532. assert(needle);
  533. UnicodeString a = UTF8ToUnicode(haystack);
  534. UnicodeString b = UTF8ToUnicode(needle);
  535. return u_strstr(a.getTerminatedBuffer(), b.getTerminatedBuffer());
  536. }
  537. /**
  538. * Find a string in another string while ignoreing case.
  539. *
  540. * @param[in] haystack The string to search in
  541. * @param[in] needle The string to search for.
  542. * @param[in] locale The locale to use when converting case.
  543. *
  544. * @return boolean
  545. * @retval true The needle was found
  546. * @retval false The needle wasn't found
  547. */
  548. bool u8_icontains(const char *haystack, const char *needle, const ECLocale &locale)
  549. {
  550. assert(haystack);
  551. assert(needle);
  552. UnicodeString a = UTF8ToUnicode(haystack);
  553. UnicodeString b = UTF8ToUnicode(needle);
  554. a.foldCase();
  555. b.foldCase();
  556. return u_strstr(a.getTerminatedBuffer(), b.getTerminatedBuffer());
  557. }
  558. /**
  559. * Copy at most n characters from the utf8 string src to lpstrDest.
  560. *
  561. * @param[in] src The UTF-8 source data to copy
  562. * @param[in] n The maximum amount of characters to copy
  563. * @param[out] lpstrDest The copied data.
  564. *
  565. * @return The amount of characters copied.
  566. */
  567. unsigned u8_ncpy(const char *src, unsigned n, std::string *lpstrDest)
  568. {
  569. const char *it = src;
  570. unsigned len = 0;
  571. while (true) {
  572. const char *tmp = it;
  573. utf8::uint32_t cp = utf8::unchecked::next(tmp);
  574. if (cp == 0)
  575. break;
  576. it = tmp;
  577. if (++len == n)
  578. break;
  579. }
  580. lpstrDest->assign(src, it);
  581. return len;
  582. }
  583. /**
  584. * Returns the length in bytes of the string s when capped to a maximum of
  585. * max characters.
  586. *
  587. * @param[in] s The UTF-8 string to process
  588. * @param[in] max The maximum amount of characters for which to return
  589. * the length in bytes.
  590. *
  591. * @return The length in bytes of the capped string.
  592. */
  593. unsigned u8_cappedbytes(const char *s, unsigned max)
  594. {
  595. const char *it = s;
  596. unsigned len = 0;
  597. while (true) {
  598. const char *tmp = it;
  599. utf8::uint32_t cp = utf8::unchecked::next(tmp);
  600. if (cp == 0)
  601. break;
  602. it = tmp;
  603. if (++len == max)
  604. break;
  605. }
  606. return unsigned(it - s);
  607. }
  608. /**
  609. * Returns the length in characters of the passed UTF-8 string s
  610. *
  611. * @param[in] s The UTF-8 string to get length of.
  612. *
  613. * @return The length in characters of string s
  614. */
  615. unsigned u8_len(const char *s)
  616. {
  617. unsigned len = 0;
  618. while (true) {
  619. utf8::uint32_t cp = utf8::unchecked::next(s);
  620. if (cp == 0)
  621. break;
  622. ++len;
  623. }
  624. return len;
  625. }
  626. static const struct localemap {
  627. const char *lpszLocaleID; /*< Posix locale id */
  628. ULONG ulLCID; /*< Windows LCID */
  629. const char *lpszLocaleName; /*< Windows locale name */
  630. } localeMap[] = {
  631. {"af",54,"Afrikaans_South Africa"},
  632. {"af_NA",54,"Afrikaans_South Africa"},
  633. {"af_ZA",1078,"Afrikaans_South Africa"},
  634. {"ar",1,"Arabic_Saudi Arabia"},
  635. {"ar_BH",15361,"Arabic_Bahrain"},
  636. {"ar_DZ",5121,"Arabic_Algeria"},
  637. {"ar_EG",3073,"Arabic_Egypt"},
  638. {"ar_IQ",2049,"Arabic_Iraq"},
  639. {"ar_JO",11265,"Arabic_Jordan"},
  640. {"ar_KW",13313,"Arabic_Kuwait"},
  641. {"ar_LB",12289,"Arabic_Lebanon"},
  642. {"ar_LY",4097,"Arabic_Libya"},
  643. {"ar_MA",6145,"Arabic_Morocco"},
  644. {"ar_OM",8193,"Arabic_Oman"},
  645. {"ar_QA",16385,"Arabic_Qatar"},
  646. {"ar_SA",1025,"Arabic_Saudi Arabia"},
  647. {"ar_SD",1,"Arabic_Saudi Arabia"},
  648. {"ar_SY",10241,"Arabic_Syria"},
  649. {"ar_TN",7169,"Arabic_Tunisia"},
  650. {"ar_YE",9217,"Arabic_Yemen"},
  651. {"az",44,"Azeri (Latin)_Azerbaijan"},
  652. {"az_Cyrl_AZ",2092,"Azeri (Cyrillic)_Azerbaijan"},
  653. {"az_Latn_AZ",1068,"Azeri (Latin)_Azerbaijan"},
  654. {"be",35,"Belarusian_Belarus"},
  655. {"be_BY",1059,"Belarusian_Belarus"},
  656. {"bg",2,"Bulgarian_Bulgaria"},
  657. {"bg_BG",1026,"Bulgarian_Bulgaria"},
  658. {"ca",3,"Catalan_Spain"},
  659. {"ca_ES",1027,"Catalan_Spain"},
  660. {"cs",5,"Czech_Czech Republic"},
  661. {"cs_CZ",1029,"Czech_Czech Republic"},
  662. {"cy",82,"Welsh_United Kingdom"},
  663. {"cy_GB",1106,"Welsh_United Kingdom"},
  664. {"da",6,"Danish_Denmark"},
  665. {"da_DK",1030,"Danish_Denmark"},
  666. {"de",7,"German_Germany"},
  667. {"de_AT",3079,"German_Austria"},
  668. {"de_BE",7,"German_Germany"},
  669. {"de_CH",2055,"German_Switzerland"},
  670. {"de_DE",1031,"German_Germany"},
  671. {"de_LI",5127,"German_Liechtenstein"},
  672. {"de_LU",4103,"German_Luxembourg"},
  673. {"el",8,"Greek_Greece"},
  674. {"el_CY",8,"Greek_Greece"},
  675. {"el_GR",1032,"Greek_Greece"},
  676. {"en",9,"English_United States"},
  677. {"en_AU",3081,"English_Australia"},
  678. {"en_BE",9,"English_United States"},
  679. {"en_BW",9,"English_United States"},
  680. {"en_BZ",10249,"English_Belize"},
  681. {"en_CA",4105,"English_Canada"},
  682. {"en_GB",2057,"English_United Kingdom"},
  683. {"en_HK",9,"English_United States"},
  684. {"en_IE",6153,"English_Ireland"},
  685. {"en_JM",8201,"English_Jamaica"},
  686. {"en_MH",1033,"English_United States"},
  687. {"en_MT",9,"English_United States"},
  688. {"en_MU",9,"English_United States"},
  689. {"en_NA",9,"English_United States"},
  690. {"en_NZ",5129,"English_New Zealand"},
  691. {"en_PH",13321,"English_Republic of the Philippines"},
  692. {"en_PK",9,"English_United States"},
  693. {"en_TT",11273,"English_Trinidad and Tobago"},
  694. {"en_US",1033,"English_United States"},
  695. {"en_VI",9225,"English_Caribbean"},
  696. {"en_ZA",7177,"English_South Africa"},
  697. {"en_ZW",12297,"English_Zimbabwe"},
  698. {"es",10,"Spanish_Spain"},
  699. {"es_AR",11274,"Spanish_Argentina"},
  700. {"es_BO",16394,"Spanish_Bolivia"},
  701. {"es_CL",13322,"Spanish_Chile"},
  702. {"es_CO",9226,"Spanish_Colombia"},
  703. {"es_CR",5130,"Spanish_Costa Rica"},
  704. {"es_DO",7178,"Spanish_Dominican Republic"},
  705. {"es_EC",12298,"Spanish_Ecuador"},
  706. {"es_ES",3082,"Spanish_Spain"},
  707. {"es_GQ",10,"Spanish_Spain"},
  708. {"es_GT",4106,"Spanish_Guatemala"},
  709. {"es_HN",18442,"Spanish_Honduras"},
  710. {"es_MX",2058,"Spanish_Mexico"},
  711. {"es_NI",19466,"Spanish_Nicaragua"},
  712. {"es_PA",6154,"Spanish_Panama"},
  713. {"es_PE",10250,"Spanish_Peru"},
  714. {"es_PR",20490,"Spanish_Puerto Rico"},
  715. {"es_PY",15370,"Spanish_Paraguay"},
  716. {"es_SV",17418,"Spanish_El Salvador"},
  717. {"es_UY",14346,"Spanish_Uruguay"},
  718. {"es_VE",8202,"Spanish_Venezuela"},
  719. {"et",37,"Estonian_Estonia"},
  720. {"et_EE",1061,"Estonian_Estonia"},
  721. {"eu",45,"Basque_Spain"},
  722. {"eu_ES",1069,"Basque_Spain"},
  723. {"fa",41,"Farsi_Iran"},
  724. {"fa_IR",1065,"Farsi_Iran"},
  725. {"fi",11,"Finnish_Finland"},
  726. {"fi_FI",1035,"Finnish_Finland"},
  727. {"fil",100,"Filipino_Philippines"},
  728. {"fil_PH",1124,"Filipino_Philippines"},
  729. {"fo",56,"Faroese_Faroe Islands"},
  730. {"fo_FO",1080,"Faroese_Faroe Islands"},
  731. {"fr",12,"French_France"},
  732. {"fr_BE",2060,"French_Belgium"},
  733. {"fr_BL",12,"French_France"},
  734. {"fr_CA",3084,"French_Canada"},
  735. {"fr_CF",12,"French_France"},
  736. {"fr_CH",4108,"French_Switzerland"},
  737. {"fr_FR",1036,"French_France"},
  738. {"fr_GN",12,"French_France"},
  739. {"fr_GP",12,"French_France"},
  740. {"fr_LU",5132,"French_Luxembourg"},
  741. {"fr_MC",6156,"French_Principality of Monaco"},
  742. {"fr_MF",12,"French_France"},
  743. {"fr_MG",12,"French_France"},
  744. {"fr_MQ",12,"French_France"},
  745. {"fr_NE",12,"French_France"},
  746. {"ga_IE",2108,"Irish_Ireland"},
  747. {"gl",86,"Galician_Spain"},
  748. {"gl_ES",1110,"Galician_Spain"},
  749. {"gu",71,"Gujarati_India"},
  750. {"gu_IN",1095,"Gujarati_India"},
  751. {"he",13,"Hebrew_Israel"},
  752. {"he_IL",1037,"Hebrew_Israel"},
  753. {"hi",57,"Hindi_India"},
  754. {"hi_IN",1081,"Hindi_India"},
  755. {"hr",26,"Croatian_Croatia"},
  756. {"hr_HR",1050,"Croatian_Croatia"},
  757. {"hu",14,"Hungarian_Hungary"},
  758. {"hu_HU",1038,"Hungarian_Hungary"},
  759. {"hy",43,"Armenian_Armenia"},
  760. {"hy_AM",1067,"Armenian_Armenia"},
  761. {"id",33,"Indonesian_Indonesia"},
  762. {"id_ID",1057,"Indonesian_Indonesia"},
  763. {"is",15,"Icelandic_Iceland"},
  764. {"is_IS",1039,"Icelandic_Iceland"},
  765. {"it",16,"Italian_Italy"},
  766. {"it_CH",2064,"Italian_Switzerland"},
  767. {"it_IT",1040,"Italian_Italy"},
  768. {"ja",17,"Japanese_Japan"},
  769. {"ja_JP",1041,"Japanese_Japan"},
  770. {"ka",55,"Georgian_Georgia"},
  771. {"ka_GE",1079,"Georgian_Georgia"},
  772. {"kk",63,"Kazakh_Kazakhstan"},
  773. {"kk_Cyrl",63,"Kazakh_Kazakhstan"},
  774. {"kk_Cyrl_KZ",63,"Kazakh_Kazakhstan"},
  775. {"kn",75,"Kannada_India"},
  776. {"kn_IN",1099,"Kannada_India"},
  777. {"ko",18,"Korean_Korea"},
  778. {"ko_KR",1042,"Korean_Korea"},
  779. {"kok",87,"Konkani_India"},
  780. {"kok_IN",1111,"Konkani_India"},
  781. {"lt",39,"Lithuanian_Lithuania"},
  782. {"lt_LT",1063,"Lithuanian_Lithuania"},
  783. {"lv",38,"Latvian_Latvia"},
  784. {"lv_LV",1062,"Latvian_Latvia"},
  785. {"mk",47,"FYRO Macedonian_Former Yugoslav Republic of Macedonia"},
  786. {"mk_MK",1071,"FYRO Macedonian_Former Yugoslav Republic of Macedonia"},
  787. {"mr",78,"Marathi_India"},
  788. {"mr_IN",1102,"Marathi_India"},
  789. {"ms",62,"Malay_Malaysia"},
  790. {"ms_BN",2110,"Malay_Brunei Darussalam"},
  791. {"ms_MY",1086,"Malay_Malaysia"},
  792. {"mt",58,"Maltese_Malta"},
  793. {"mt_MT",1082,"Maltese_Malta"},
  794. {"nb_NO",1044,"Norwegian_Norway"},
  795. {"ne",97,"Nepali_Nepal"},
  796. {"ne_NP",1121,"Nepali_Nepal"},
  797. {"nl",19,"Dutch_Netherlands"},
  798. {"nl_BE",2067,"Dutch_Belgium"},
  799. {"nl_NL",1043,"Dutch_Netherlands"},
  800. {"nn_NO",2068,"Norwegian (Nynorsk)_Norway"},
  801. {"pa",70,"Punjabi_India"},
  802. {"pa_Arab",70,"Punjabi_India"},
  803. {"pa_Arab_PK",70,"Punjabi_India"},
  804. {"pa_Guru",70,"Punjabi_India"},
  805. {"pa_Guru_IN",70,"Punjabi_India"},
  806. {"pl",21,"Polish_Poland"},
  807. {"pl_PL",1045,"Polish_Poland"},
  808. {"ps",99,"Pashto_Afghanistan"},
  809. {"ps_AF",1123,"Pashto_Afghanistan"},
  810. {"pt",22,"Portuguese_Brazil"},
  811. {"pt_BR",1046,"Portuguese_Brazil"},
  812. {"pt_GW",22,"Portuguese_Brazil"},
  813. {"pt_MZ",22,"Portuguese_Brazil"},
  814. {"pt_PT",2070,"Portuguese_Portugal"},
  815. {"rm",23,"Romansh_Switzerland"},
  816. {"rm_CH",1047,"Romansh_Switzerland"},
  817. {"ro",24,"Romanian_Romania"},
  818. {"ro_MD",24,"Romanian_Romania"},
  819. {"ro_RO",1048,"Romanian_Romania"},
  820. {"ru",25,"Russian_Russia"},
  821. {"ru_MD",25,"Russian_Russia"},
  822. {"ru_RU",1049,"Russian_Russia"},
  823. {"ru_UA",25,"Russian_Russia"},
  824. {"sk",27,"Slovak_Slovakia"},
  825. {"sk_SK",1051,"Slovak_Slovakia"},
  826. {"sl",36,"Slovenian_Slovenia"},
  827. {"sl_SI",1060,"Slovenian_Slovenia"},
  828. {"sq",28,"Albanian_Albania"},
  829. {"sq_AL",1052,"Albanian_Albania"},
  830. {"sr_Cyrl_BA",7194,"Serbian (Cyrillic)_Bosnia and Herzegovina"},
  831. {"sr_Latn_BA",6170,"Serbian (Latin)_Bosnia and Herzegovina"},
  832. {"sv",29,"Swedish_Sweden"},
  833. {"sv_FI",2077,"Swedish_Finland"},
  834. {"sv_SE",1053,"Swedish_Sweden"},
  835. {"sw",65,"Swahili_Kenya"},
  836. {"sw_KE",1089,"Swahili_Kenya"},
  837. {"sw_TZ",65,"Swahili_Kenya"},
  838. {"ta",73,"Tamil_India"},
  839. {"ta_IN",1097,"Tamil_India"},
  840. {"ta_LK",73,"Tamil_India"},
  841. {"te",74,"Telugu_India"},
  842. {"te_IN",1098,"Telugu_India"},
  843. {"th",30,"Thai_Thailand"},
  844. {"th_TH",1054,"Thai_Thailand"},
  845. {"tr",31,"Turkish_Turkey"},
  846. {"tr_TR",1055,"Turkish_Turkey"},
  847. {"uk",34,"Ukrainian_Ukraine"},
  848. {"uk_UA",1058,"Ukrainian_Ukraine"},
  849. {"ur",32,"Urdu_Islamic Republic of Pakistan"},
  850. {"ur_PK",1056,"Urdu_Islamic Republic of Pakistan"},
  851. {"uz",67,"Uzbek (Latin)_Uzbekistan"},
  852. {"uz_Arab",67,"Uzbek (Latin)_Uzbekistan"},
  853. {"uz_Arab_AF",67,"Uzbek (Latin)_Uzbekistan"},
  854. {"uz_Cyrl_UZ",2115,"Uzbek (Cyrillic)_Uzbekistan"},
  855. {"uz_Latn_UZ",1091,"Uzbek (Latin)_Uzbekistan"},
  856. {"vi",42,"Vietnamese_Viet Nam"},
  857. {"vi_VN",1066,"Vietnamese_Viet Nam"},
  858. {"zh_Hans",4,"Chinese_Taiwan"},
  859. {"zh_Hans_CN",2052,"Chinese_People's Republic of China"},
  860. {"zh_Hans_HK",4,"Chinese_Taiwan"},
  861. {"zh_Hans_MO",4,"Chinese_Taiwan"},
  862. {"zh_Hans_SG",4100,"Chinese_Singapore"},
  863. {"zh_Hant_TW",1028,"Chinese_Taiwan"},
  864. {"zu",53,"Zulu_South Africa"},
  865. {"zu_ZA",1077,"Zulu_South Africa"},
  866. };
  867. ECLocale createLocaleFromName(const char *lpszLocale)
  868. {
  869. return Locale::createFromName(lpszLocale);
  870. }
  871. ECRESULT LocaleIdToLCID(const char *lpszLocaleID, ULONG *lpulLcid)
  872. {
  873. const struct localemap *lpMapEntry = NULL;
  874. assert(lpszLocaleID != NULL);
  875. assert(lpulLcid != NULL);
  876. for (size_t i = 0; lpMapEntry == nullptr && i < ARRAY_SIZE(localeMap); ++i)
  877. if (strcasecmp(localeMap[i].lpszLocaleID, lpszLocaleID) == 0)
  878. lpMapEntry = &localeMap[i];
  879. if (lpMapEntry == NULL)
  880. return KCERR_NOT_FOUND;
  881. *lpulLcid = lpMapEntry->ulLCID;
  882. return erSuccess;
  883. }
  884. ECRESULT LCIDToLocaleId(ULONG ulLcid, const char **lppszLocaleID)
  885. {
  886. const struct localemap *lpMapEntry = NULL;
  887. assert(lppszLocaleID != NULL);
  888. for (size_t i = 0; lpMapEntry == nullptr && i < ARRAY_SIZE(localeMap); ++i)
  889. if (localeMap[i].ulLCID == ulLcid)
  890. lpMapEntry = &localeMap[i];
  891. if (lpMapEntry == NULL)
  892. return KCERR_NOT_FOUND;
  893. *lppszLocaleID = lpMapEntry->lpszLocaleID;
  894. return erSuccess;
  895. }
  896. /**
  897. * Create a locale independant blob that can be used to sort
  898. * strings fast. This is used when a string would be compared
  899. * multiple times.
  900. *
  901. * @param[in] s The string to compare.
  902. * @param[in] nCap Base the key on the first nCap characters of s (if larger than 0).
  903. * @param[in] locale The locale used to create the sort key.
  904. *
  905. * @returns ECSortKey object containing the blob
  906. */
  907. static ECSortKey createSortKey(UnicodeString s, int nCap,
  908. const ECLocale &locale)
  909. {
  910. if (nCap > 1)
  911. s.truncate(nCap);
  912. // Quick workaround for sorting items starting with ' (like From and To) and ( and '(
  913. if (s.startsWith("'") || s.startsWith("("))
  914. s.remove(0, 1);
  915. CollationKey key;
  916. UErrorCode status = U_ZERO_ERROR;
  917. unique_ptr_Collator ptrCollator(Collator::createInstance(locale, status));
  918. ptrCollator->getCollationKey(s, key, status); // Create a collation key for sorting
  919. return key;
  920. }
  921. /**
  922. * Create a locale independant blob that can be used to sort
  923. * strings fast. This is used when a string would be compared
  924. * multiple times.
  925. *
  926. * @param[in] s The string to compare.
  927. * @param[in] nCap Base the key on the first nCap characters of s (if larger than 0).
  928. * @param[in] locale The locale used to create the sort key.
  929. * @param[out] lpcbKeys The size in bytes of the returned key.
  930. * @param[ou]t lppKey The returned key.
  931. */
  932. static void createSortKeyData(const UnicodeString &s, int nCap, const ECLocale &locale, unsigned int *lpcbKey, unsigned char **lppKey)
  933. {
  934. unsigned char *lpKey = NULL;
  935. CollationKey key = createSortKey(s, nCap, locale);
  936. int32_t cbKeyData = 0;
  937. const uint8_t *lpKeyData = key.getByteArray(cbKeyData);
  938. lpKey = new unsigned char[cbKeyData];
  939. memcpy(lpKey, lpKeyData, cbKeyData);
  940. *lpcbKey = cbKeyData;
  941. *lppKey = lpKey;
  942. }
  943. /**
  944. * Create a locale independant blob that can be used to sort
  945. * strings fast. This is used when a string would be compared
  946. * multiple times.
  947. *
  948. * @param[in] s The string to compare.
  949. * @param[in] nCap Base the key on the first nCap characters of s (if larger than 0).
  950. * @param[in] locale The locale used to create the sort key.
  951. * @param[out] lpcbKeys The size in bytes of the returned key.
  952. * @param[ou]t lppKey The returned key.
  953. */
  954. void createSortKeyData(const char *s, int nCap, const ECLocale &locale, unsigned int *lpcbKey, unsigned char **lppKey)
  955. {
  956. assert(s != NULL);
  957. assert(lpcbKey != NULL);
  958. assert(lppKey != NULL);
  959. createSortKeyData(UnicodeString(s), nCap, locale, lpcbKey, lppKey);
  960. }
  961. /**
  962. * Create a locale independant blob that can be used to sort
  963. * strings fast. This is used when a string would be compared
  964. * multiple times.
  965. *
  966. * @param[in] s The string to compare.
  967. * @param[in] locale The locale used to create the sort key.
  968. * @param[out] lpcbKeys The size in bytes of the returned key.
  969. * @param[ou]t lppKey The returned key.
  970. */
  971. void createSortKeyData(const wchar_t *s, int nCap, const ECLocale &locale, unsigned int *lpcbKey, unsigned char **lppKey)
  972. {
  973. assert(s != NULL);
  974. assert(lpcbKey != NULL);
  975. assert(lppKey != NULL);
  976. UnicodeString ustring;
  977. ustring = UTF32ToUnicode((const UChar32*)s);
  978. createSortKeyData(ustring, nCap, locale, lpcbKey, lppKey);
  979. }
  980. /**
  981. * Create a locale independant blob that can be used to sort
  982. * strings fast. This is used when a string would be compared
  983. * multiple times.
  984. *
  985. * @param[in] s The string to compare.
  986. * @param[in] nCap Base the key on the first nCap characters of s (if larger than 0).
  987. * @param[in] locale The locale used to create the sort key.
  988. * @param[out] lpcbKeys The size in bytes of the returned key.
  989. * @param[ou]t lppKey The returned key.
  990. */
  991. void createSortKeyDataFromUTF8(const char *s, int nCap, const ECLocale &locale, unsigned int *lpcbKey, unsigned char **lppKey)
  992. {
  993. assert(s != NULL);
  994. assert(lpcbKey != NULL);
  995. assert(lppKey != NULL);
  996. createSortKeyData(UTF8ToUnicode(s), nCap, locale, lpcbKey, lppKey);
  997. }
  998. /**
  999. * Create a locale independant blob that can be used to sort
  1000. * strings fast. This is used when a string would be compared
  1001. * multiple times.
  1002. *
  1003. * @param[in] s The string to compare.
  1004. * @param[in] nCap Base the key on the first nCap characters of s (if larger than 0).
  1005. * @param[in] locale The locale used to create the sort key.
  1006. *
  1007. * @returns The ECSortKey containing the blob.
  1008. */
  1009. ECSortKey createSortKeyFromUTF8(const char *s, int nCap, const ECLocale &locale)
  1010. {
  1011. assert(s != NULL);
  1012. return createSortKey(UTF8ToUnicode(s), nCap, locale);
  1013. }
  1014. /**
  1015. * Compare two sort keys previously created with createSortKey.
  1016. *
  1017. * @param[in] cbKey1 The size i nbytes of key 1.
  1018. * @param[in] lpKey1 Key 1.
  1019. * @param[in] cbKey2 The size i nbytes of key 2.
  1020. * @param[in] lpKey2 Key 2.
  1021. *
  1022. * @retval <0 Key1 is smaller than key2
  1023. * @retval 0 Key1 equals key2
  1024. * @retval >0 Key1 is greater than key2
  1025. */
  1026. int compareSortKeys(unsigned int cbKey1, const unsigned char *lpKey1, unsigned int cbKey2, const unsigned char *lpKey2)
  1027. {
  1028. assert(!(cbKey1 != 0 && lpKey1 == NULL));
  1029. assert(!(cbKey2 != 0 && lpKey2 == NULL));
  1030. CollationKey ckA(lpKey1, cbKey1);
  1031. CollationKey ckB(lpKey2, cbKey2);
  1032. int cmp = 1;
  1033. UErrorCode status = U_ZERO_ERROR;
  1034. switch (ckA.compareTo(ckB, status)) {
  1035. case UCOL_LESS: cmp = -1; break;
  1036. case UCOL_EQUAL: cmp = 0; break;
  1037. case UCOL_GREATER: cmp = 1; break;
  1038. }
  1039. return cmp;
  1040. }
  1041. } /* namespace */
  1042. /** @} */