hp
/
kopano-core


			
				
					
						
						
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142
							/*
 * Copyright 2005 - 2016 Zarafa and its licensors
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License, version 3,
 * as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */

/**
@file
Unicode String Utilities

@defgroup ustringutil Unicode String Utilities
@{

The Unicode String Utilities provide some common string utilities aimed to be compliant with
all (or at least most) of the Unicode quirks.

The provided functions are:
  - str_equals, wcs_equals, u8_equals: Check if two strings are equal.
  - str_iequals, wcs_iequals, u8_iequals: Check if two strings are equal ignoring case.
  - str_startswith, wcs_startswith, u8_startswith: Check if one string starts with another.
  - str_istartswith, wcs_istartswith, u8_istartswith: Check if one string starts with another ignoring case.
  - str_icompare, wcs_icompare, u8_icompare: Compare two strings ignoring case.
  - str_contains, wcs_contains, u8_contains: Check if one string contains the other.
  - str_icontains, wcs_icontains, u8_icontains: Check if one string contains the other ignoring case.

@par Normalization
In order to compare unicode strings, the data needs to be normailized first. This is needed because Unicode allows
different binary representations of the same data. The functions provide in this module make no assumptions about
the provided data and will always perform a normalization before doing a comparison.

@par Case mapping
The case insensitive functions need a way to match code points regardless of their case. ICU provides a few methods for
this, but they use a method called case-folding to avoid the need for a locale (changing case is dependant on a locale).
Since case-folding doesn't take a locale, it's a best guess method, which will produce wrong results in certain situations.
The functions in this library apply a method called case-mapping, which basically means we perform a to-upper on all
code-points with a provided locale.

@par Collation
The functions that try to match (sub)strings, have no interest in the order in which strings would appear if they would be
sorted. However, the compare functions do produce a result that could be used for sorting. Since sorting is dependant on a
locale as well, they would need a locale. However, ICU provides a Collator class that performs the actual comparison for a
particular locale. Since we don't want to construct a Collator class for every string comparison, the string comparison
functions take a Collator object as argument. This way the caller can reuse the Collator.

@par Performance
Performance of the current (21-05-2010) implementation is probably pretty bad. This is caused by all the conversion that are
performed on the complete strings before the actual comparison is even started.

At some point we need to rewqrite these functions to do all the conversion on the fly to minimize processing.
*/

#include "config.h"
#include <kopano/platform.h>
#include <kopano/ustringutil.h>
#include <kopano/CommonUtil.h>
#include "utf8/unchecked.h"
#include <cassert>
#include <memory>
#include <unicode/unorm.h>
#include <unicode/coll.h>
#include <unicode/tblcoll.h>
#include <unicode/coleitr.h>
#include <unicode/normlzr.h>
#include <unicode/ustring.h>

#include "ustringutil/utfutil.h"

typedef std::unique_ptr<Collator> unique_ptr_Collator;

namespace KC {

/** 
 * US-ASCII version to find a case-insensitive string part in a
 * haystack.
 * 
 * @param haystack search this haystack for a case-insensitive needle
 * @param needle search this needle in the case-insensitive haystack
 * 
 * @return pointer where needle is found or NULL
 */
const char* str_ifind(const char *haystack, const char *needle)
{
	locale_t loc = createlocale(LC_CTYPE, "C");
	const char *needlepos = needle;
	const char *needlestart = haystack;

	while(*haystack) {
		if (toupper_l(*haystack, loc) == toupper_l(*needlepos, loc)) {
			++needlepos;

			if(*needlepos == 0)
				goto exit;
		} else {
			haystack = needlestart++;
			needlepos = needle;
		}

		++haystack;
	}
	needlestart = NULL;

exit:
	freelocale(loc);

	return needlestart;
}

/**
 * Check if two strings are canonical equivalent.
 * 
 * @param[in]	s1		The string to compare s2 with.
 * @param[in]	s2		The string to compare s1 with.
 * @param[in]	locale	The locale used to perform string collation.
 * 
 * @return	boolean
 * @retval	true	The strings are canonical equivalent
 * @retval	false	The strings are not canonical equivalent
 */
bool str_equals(const char *s1, const char *s2, const ECLocale &locale)
{
	assert(s1);
	assert(s2);
    UnicodeString a = StringToUnicode(s1);
    UnicodeString b = StringToUnicode(s2);

    return a.compare(b) == 0;
}

/**
 * Check if two strings are canonical equivalent when ignoring the case.
 * 
 * @param[in]	s1		The string to compare s2 with.
 * @param[in]	s2		The string to compare s1 with.
 * @param[in]	locale	The locale used to convert the case of the strings.
 * 
 * @return	boolean
 * @retval	true	The strings are canonical equivalent
 * @retval	false	The strings are not canonical equivalent
 */
bool str_iequals(const char *s1, const char *s2, const ECLocale &locale)
{
	assert(s1);
	assert(s2);
    UnicodeString a = StringToUnicode(s1);
    UnicodeString b = StringToUnicode(s2);

    return a.caseCompare(b, 0) == 0;
}

/**
 * Check if the string s1 starts with s2.
 * 
 * @param[in]	s1		The string to compare s2 with.
 * @param[in]	s2		The string to compare s1 with.
 * @param[in]	locale	The locale used to perform string collation.
 * 
 * @return	boolean
 * @retval	true	The strings are canonical equivalent
 * @retval	false	The strings are not canonical equivalent
 */
bool str_startswith(const char *s1, const char *s2, const ECLocale &locale)
{
	assert(s1);
	assert(s2);
    UnicodeString a = StringToUnicode(s1);
    UnicodeString b = StringToUnicode(s2);

    return a.compare(0, b.length(), b) == 0;
}

/**
 * Check if the string s1 starts with s2 when ignoring the case.
 * 
 * @param[in]	s1		The string to compare s2 with.
 * @param[in]	s2		The string to compare s1 with.
 * @param[in]	locale	The locale used to convert the case of the strings.
 * 
 * @return	boolean
 * @retval	true	The strings are canonical equivalent
 * @retval	false	The strings are not canonical equivalent
 */
bool str_istartswith(const char *s1, const char *s2, const ECLocale &locale)
{
	assert(s1);
	assert(s2);
    UnicodeString a = StringToUnicode(s1);
    UnicodeString b = StringToUnicode(s2);

    return a.caseCompare(0, b.length(), b, 0) == 0;
}

/**
 * Compare two strings using the collator to determine the sort order.
 * 
 * Both strings are expectes to be in the current locale. The comparison is
 * case insensitive. Effectively this only changes behavior compared to strcmp_unicode
 * if the two strings are the same if the case is discarded. It doesn't effect the
 * sorting in any other way.
 * 
 * @param[in]	s1		The string to compare s2 with.
 * @param[in]	s2		The string to compare s1 with.
 * @param[in]	collator	The collator used to determine which string precedes the other.
 * 
 * @return		An integer.
 * @retval		-1	s1 is smaller than s2
 * @retval		0	s1 equals s2.
 * @retval		1	s1 is greater than s2
 */
int str_icompare(const char *s1, const char *s2, const ECLocale &locale)
{
	assert(s1);
	assert(s2);
	UErrorCode status = U_ZERO_ERROR;
	unique_ptr_Collator ptrCollator(Collator::createInstance(locale, status));

	UnicodeString a = StringToUnicode(s1);
	UnicodeString b = StringToUnicode(s2);

	a.foldCase();
	b.foldCase();

	return ptrCollator->compare(a,b,status);
}

/**
 * Find a string in another string.
 *
 * @param[in]	haystack	The string to search in
 * @param[in]	needle		The string to search for.
 * @param[in]	locale		The locale used to perform string collation.
 *
 * @return boolean
 * @retval	true	The needle was found
 * @retval	false	The needle wasn't found
 *
 * @note This function behaves different than strstr in that it returns a
 *       a boolean instead of a pointer to the found substring. This is
 *       because we search on a transformed string. Getting the correct
 *       pointer would involve additional processing while we don't need
 *       the result anyway.
 */
bool str_contains(const char *haystack, const char *needle, const ECLocale &locale)
{
	assert(haystack);
	assert(needle);
    UnicodeString a = StringToUnicode(haystack);
    UnicodeString b = StringToUnicode(needle);

    return u_strstr(a.getTerminatedBuffer(), b.getTerminatedBuffer());
}

/**
 * Find a string in another string while ignoreing case.
 *
 * @param[in]	haystack	The string to search in
 * @param[in]	needle		The string to search for.
 * @param[in]	locale		The locale used to convert the case of the strings.
 *
 * @return boolean
 * @retval	true	The needle was found
 * @retval	false	The needle wasn't found
 */
bool str_icontains(const char *haystack, const char *needle, const ECLocale &locale)
{
	assert(haystack);
	assert(needle);
    UnicodeString a = StringToUnicode(haystack);
    UnicodeString b = StringToUnicode(needle);

    a.foldCase();
    b.foldCase();

    return u_strstr(a.getTerminatedBuffer(), b.getTerminatedBuffer());
}

/**
 * Check if two strings are canonical equivalent.
 * 
 * @param[in]	s1		The string to compare s2 with.
 * @param[in]	s2		The string to compare s1 with.
 * @param[in]	locale	The locale used to perform string collation.
 * 
 * @return	boolean
 * @retval	true	The strings are canonical equivalent
 * @retval	false	The strings are not canonical equivalent
 */
bool wcs_equals(const wchar_t *s1, const wchar_t *s2, const ECLocale &locale)
{
	assert(s1);
	assert(s2);
    UnicodeString a = WCHARToUnicode(s1);
    UnicodeString b = WCHARToUnicode(s2);

    return a.compare(b) == 0;
}

/**
 * Check if two strings are canonical equivalent when ignoring the case.
 * 
 * @param[in]	s1		The string to compare s2 with.
 * @param[in]	s2		The string to compare s1 with.
 * @param[in]	locale	The locale used to convert the case of the strings.
 * 
 * @return	boolean
 * @retval	true	The strings are canonical equivalent
 * @retval	false	The strings are not canonical equivalent
 */
bool wcs_iequals(const wchar_t *s1, const wchar_t *s2, const ECLocale &locale)
{
	assert(s1);
	assert(s2);
    UnicodeString a = WCHARToUnicode(s1);
    UnicodeString b = WCHARToUnicode(s2);

    return a.caseCompare(b, 0) == 0;
}

/**
 * Check if s1 starts with s2.
 * 
 * @param[in]	s1		The string to compare s2 with.
 * @param[in]	s2		The string to compare s1 with.
 * @param[in]	locale	The locale used to perform string collation.
 * 
 * @return	boolean
 * @retval	true	The strings are canonical equivalent
 * @retval	false	The strings are not canonical equivalent
 */
bool wcs_startswith(const wchar_t *s1, const wchar_t *s2, const ECLocale &locale)
{
	assert(s1);
	assert(s2);
    UnicodeString a = WCHARToUnicode(s1);
    UnicodeString b = WCHARToUnicode(s2);

    return a.compare(0, b.length(), b) == 0;
}

/**
 * Check if s1 starts with s2 when ignoring the case.
 * 
 * @param[in]	s1		The string to compare s2 with.
 * @param[in]	s2		The string to compare s1 with.
 * @param[in]	locale	The locale used to convert the case of the strings.
 * 
 * @return	boolean
 * @retval	true	The strings are canonical equivalent
 * @retval	false	The strings are not canonical equivalent
 */
bool wcs_istartswith(const wchar_t *s1, const wchar_t *s2, const ECLocale &locale)
{
	assert(s1);
	assert(s2);
    UnicodeString a = WCHARToUnicode(s1);
    UnicodeString b = WCHARToUnicode(s2);

    return a.caseCompare(0, b.length(), b, 0) == 0;
}

/**
 * Compare two strings using the collator to determine the sort order.
 * 
 * Both strings are expectes to be in the current locale. The comparison is
 * case insensitive. Effectively this only changes behavior compared to strcmp_unicode
 * if the two strings are the same if the case is discarded. It doesn't effect the
 * sorting in any other way.
 * 
 * @param[in]	s1		The string to compare s2 with.
 * @param[in]	s2		The string to compare s1 with.
 * @param[in]	collator	The collator used to determine which string precedes the other.
 * 
 * @return		An integer.
 * @retval		-1	s1 is smaller than s2
 * @retval		0	s1 equals s2.
 * @retval		1	s1 is greater than s2
 */
int wcs_icompare(const wchar_t *s1, const wchar_t *s2, const ECLocale &locale)
{
	assert(s1);
	assert(s2);
	UErrorCode status = U_ZERO_ERROR;
	unique_ptr_Collator ptrCollator(Collator::createInstance(locale, status));

	UnicodeString a = WCHARToUnicode(s1);
	UnicodeString b = WCHARToUnicode(s2);

	a.foldCase();
	b.foldCase();

	return ptrCollator->compare(a,b,status);
}

/**
 * Find a string in another string.
 *
 * @param[in]	haystack	The string to search in
 * @param[in]	needle		The string to search for.
 * @param[in]	locale		The locale used to perform string collation.
 *
 * @return boolean
 * @retval	true	The needle was found
 * @retval	false	The needle wasn't found
 *
 * @note This function behaves different than strstr in that it returns a
 *       a boolean instead of a pointer to the found substring. This is
 *       because we search on a transformed string. Getting the correct
 *       pointer would involve additional processing while we don't need
 *       the result anyway.
 */
bool wcs_contains(const wchar_t *haystack, const wchar_t *needle, const ECLocale &locale)
{
	assert(haystack);
	assert(needle);
    UnicodeString a = WCHARToUnicode(haystack);
    UnicodeString b = WCHARToUnicode(needle);

    return u_strstr(a.getTerminatedBuffer(), b.getTerminatedBuffer());
}

/**
 * Find a string in another string while ignoreing case.
 *
 * @param[in]	haystack	The string to search in
 * @param[in]	needle		The string to search for.
 * @param[in]	locale		The locale to use when converting case.
 *
 * @return boolean
 * @retval	true	The needle was found
 * @retval	false	The needle wasn't found
 *
 * @note This function behaves different than strstr in that it returns a
 *       a boolean instead of a pointer to the found substring. This is
 *       because we search on a transformed string. Getting the correct
 *       pointer would involve additional processing while we don't need
 *       the result anyway.
 */
bool wcs_icontains(const wchar_t *haystack, const wchar_t *needle, const ECLocale &locale)
{
	assert(haystack);
	assert(needle);
    UnicodeString a = WCHARToUnicode(haystack);
    UnicodeString b = WCHARToUnicode(needle);

    a.foldCase();
    b.foldCase();

    return u_strstr(a.getTerminatedBuffer(), b.getTerminatedBuffer());
}

/**
 * Check if two strings are canonical equivalent.
 * 
 * @param[in]	s1		The string to compare s2 with.
 * @param[in]	s2		The string to compare s1 with.
 * @param[in]	locale	The locale used to perform string collation.
 * 
 * @return	boolean
 * @retval	true	The strings are canonical equivalent
 * @retval	false	The strings are not canonical equivalent
 */
bool u8_equals(const char *s1, const char *s2, const ECLocale &locale)
{
	assert(s1);
	assert(s2);
    UnicodeString a = UTF8ToUnicode(s1);
    UnicodeString b = UTF8ToUnicode(s2);

    return a.compare(b) == 0;
}

/**
 * Check if two strings are canonical equivalent when ignoring the case.
 * 
 * @param[in]	s1		The string to compare s2 with.
 * @param[in]	s2		The string to compare s1 with.
 * @param[in]	locale	The locale to use when converting case.
 * 
 * @return	boolean
 * @retval	true	The strings are canonical equivalent
 * @retval	false	The strings are not canonical equivalent
 */
bool u8_iequals(const char *s1, const char *s2, const ECLocale &locale)
{
	assert(s1);
	assert(s2);
    UnicodeString a = UTF8ToUnicode(s1);
    UnicodeString b = UTF8ToUnicode(s2);

    return a.caseCompare(b, 0) == 0;
}

/**
 * Check if s1 starts with s2.
 * 
 * @param[in]	s1		The string to compare s2 with.
 * @param[in]	s2		The string to compare s1 with.
 * @param[in]	locale	The locale used to perform string collation.
 * 
 * @return	boolean
 * @retval	true	The strings are canonical equivalent
 * @retval	false	The strings are not canonical equivalent
 */
bool u8_startswith(const char *s1, const char *s2, const ECLocale &locale)
{
	assert(s1);
	assert(s2);
    UnicodeString a = UTF8ToUnicode(s1);
    UnicodeString b = UTF8ToUnicode(s2);

    return a.compare(0, b.length(), b) == 0;
}

/**
 * Check if s1 starts with s2 when ignoring the case.
 * 
 * @param[in]	s1		The string to compare s2 with.
 * @param[in]	s2		The string to compare s1 with.
 * @param[in]	locale	The locale to use when converting case.
 * 
 * @return	boolean
 * @retval	true	The strings are canonical equivalent
 * @retval	false	The strings are not canonical equivalent
 */
bool u8_istartswith(const char *s1, const char *s2, const ECLocale &locale)
{
	assert(s1);
	assert(s2);
    UnicodeString a = UTF8ToUnicode(s1);
    UnicodeString b = UTF8ToUnicode(s2);

    return a.caseCompare(0, b.length(), b, 0) == 0;
}

/**
 * Compare two strings using the collator to determine the sort order.
 * 
 * Both strings are expectes to be encoded in UTF-8. The comparison is
 * case insensitive. Effectively this only changes behavior compared to strcmp_unicode
 * if the two strings are the same if the case is discarded. It doesn't effect the
 * sorting in any other way.
 * 
 * @param[in]	s1		The string to compare s2 with.
 * @param[in]	s2		The string to compare s1 with.
 * @param[in]	collator	The collator used to determine which string precedes the other.
 * 
 * @return		An integer.
 * @retval		-1	s1 is smaller than s2
 * @retval		0	s1 equals s2.
 * @retval		1	s1 is greater than s2
 */
int u8_icompare(const char *s1, const char *s2, const ECLocale &locale)
{
	assert(s1);
	assert(s2);
	UErrorCode status = U_ZERO_ERROR;
	unique_ptr_Collator ptrCollator(Collator::createInstance(locale, status));

	UnicodeString a = UTF8ToUnicode(s1);
	UnicodeString b = UTF8ToUnicode(s2);
	
	a.foldCase();
	b.foldCase();

	return ptrCollator->compare(a,b,status);
}

/**
 * Find a string in another string.
 *
 * @param[in]	haystack	The string to search in
 * @param[in]	needle		The string to search for.
 * @param[in]	locale		The locale used to perform string collation.
 *
 * @return boolean
 * @retval	true	The needle was found
 * @retval	false	The needle wasn't found
 *
 * @note This function behaves different than strstr in that it returns a
 *       a boolean instead of a pointer to the found substring. This is
 *       because we search on a transformed string. Getting the correct
 *       pointer would involve additional processing while we don't need
 *       the result anyway.
 */
bool u8_contains(const char *haystack, const char *needle, const ECLocale &locale)
{
	assert(haystack);
	assert(needle);
    UnicodeString a = UTF8ToUnicode(haystack);
    UnicodeString b = UTF8ToUnicode(needle);

    return u_strstr(a.getTerminatedBuffer(), b.getTerminatedBuffer());
}

/**
 * Find a string in another string while ignoreing case.
 *
 * @param[in]	haystack	The string to search in
 * @param[in]	needle		The string to search for.
 * @param[in]	locale		The locale to use when converting case.
 *
 * @return boolean
 * @retval	true	The needle was found
 * @retval	false	The needle wasn't found
 */
bool u8_icontains(const char *haystack, const char *needle, const ECLocale &locale)
{
	assert(haystack);
	assert(needle);
    UnicodeString a = UTF8ToUnicode(haystack);
    UnicodeString b = UTF8ToUnicode(needle);

    a.foldCase();
    b.foldCase();

    return u_strstr(a.getTerminatedBuffer(), b.getTerminatedBuffer());
}

/**
 * Copy at most n characters from the utf8 string src to lpstrDest.
 *
 * @param[in]	src			The UTF-8 source data to copy
 * @param[in]	n			The maximum amount of characters to copy
 * @param[out]	lpstrDest	The copied data.
 *
 * @return The amount of characters copied.
 */
unsigned u8_ncpy(const char *src, unsigned n, std::string *lpstrDest)
{
	const char *it = src;
	unsigned len = 0;
	while (true) {
		const char *tmp = it;
		utf8::uint32_t cp = utf8::unchecked::next(tmp);
		if (cp == 0)
			break;
		it = tmp;
		if (++len == n)
			break;
	}
	lpstrDest->assign(src, it);
	return len;
}

/**
 * Returns the length in bytes of the string s when capped to a maximum of
 * max characters.
 *
 * @param[in]	s		The UTF-8 string to process
 * @param[in]	max		The maximum amount of characters for which to return
 * 						the length in bytes.
 *
 * @return	The length in bytes of the capped string.
 */
unsigned u8_cappedbytes(const char *s, unsigned max)
{
	const char *it = s;
	unsigned len = 0;
	while (true) {
		const char *tmp = it;
		utf8::uint32_t cp = utf8::unchecked::next(tmp);
		if (cp == 0)
			break;
		it = tmp;
		if (++len == max)
			break;
	}
	return unsigned(it - s);
}

/**
 * Returns the length in characters of the passed UTF-8 string s
 *
 * @param[in]	s	The UTF-8 string to get length of.
 *
 * @return	The length in characters of string s
 */
unsigned u8_len(const char *s)
{
	unsigned len = 0;
	while (true) {
		utf8::uint32_t cp = utf8::unchecked::next(s);
		if (cp == 0)
			break;
		++len;
	}
	return len;

}

static const struct localemap {
	const char *lpszLocaleID;	/*< Posix locale id */
	ULONG ulLCID;				/*< Windows LCID */
	const char *lpszLocaleName;	/*< Windows locale name */
} localeMap[] = {
	{"af",54,"Afrikaans_South Africa"},
	{"af_NA",54,"Afrikaans_South Africa"},
	{"af_ZA",1078,"Afrikaans_South Africa"},
	{"ar",1,"Arabic_Saudi Arabia"},
	{"ar_BH",15361,"Arabic_Bahrain"},
	{"ar_DZ",5121,"Arabic_Algeria"},
	{"ar_EG",3073,"Arabic_Egypt"},
	{"ar_IQ",2049,"Arabic_Iraq"},
	{"ar_JO",11265,"Arabic_Jordan"},
	{"ar_KW",13313,"Arabic_Kuwait"},
	{"ar_LB",12289,"Arabic_Lebanon"},
	{"ar_LY",4097,"Arabic_Libya"},
	{"ar_MA",6145,"Arabic_Morocco"},
	{"ar_OM",8193,"Arabic_Oman"},
	{"ar_QA",16385,"Arabic_Qatar"},
	{"ar_SA",1025,"Arabic_Saudi Arabia"},
	{"ar_SD",1,"Arabic_Saudi Arabia"},
	{"ar_SY",10241,"Arabic_Syria"},
	{"ar_TN",7169,"Arabic_Tunisia"},
	{"ar_YE",9217,"Arabic_Yemen"},
	{"az",44,"Azeri (Latin)_Azerbaijan"},
	{"az_Cyrl_AZ",2092,"Azeri (Cyrillic)_Azerbaijan"},
	{"az_Latn_AZ",1068,"Azeri (Latin)_Azerbaijan"},
	{"be",35,"Belarusian_Belarus"},
	{"be_BY",1059,"Belarusian_Belarus"},
	{"bg",2,"Bulgarian_Bulgaria"},
	{"bg_BG",1026,"Bulgarian_Bulgaria"},
	{"ca",3,"Catalan_Spain"},
	{"ca_ES",1027,"Catalan_Spain"},
	{"cs",5,"Czech_Czech Republic"},
	{"cs_CZ",1029,"Czech_Czech Republic"},
	{"cy",82,"Welsh_United Kingdom"},
	{"cy_GB",1106,"Welsh_United Kingdom"},
	{"da",6,"Danish_Denmark"},
	{"da_DK",1030,"Danish_Denmark"},
	{"de",7,"German_Germany"},
	{"de_AT",3079,"German_Austria"},
	{"de_BE",7,"German_Germany"},
	{"de_CH",2055,"German_Switzerland"},
	{"de_DE",1031,"German_Germany"},
	{"de_LI",5127,"German_Liechtenstein"},
	{"de_LU",4103,"German_Luxembourg"},
	{"el",8,"Greek_Greece"},
	{"el_CY",8,"Greek_Greece"},
	{"el_GR",1032,"Greek_Greece"},
	{"en",9,"English_United States"},
	{"en_AU",3081,"English_Australia"},
	{"en_BE",9,"English_United States"},
	{"en_BW",9,"English_United States"},
	{"en_BZ",10249,"English_Belize"},
	{"en_CA",4105,"English_Canada"},
	{"en_GB",2057,"English_United Kingdom"},
	{"en_HK",9,"English_United States"},
	{"en_IE",6153,"English_Ireland"},
	{"en_JM",8201,"English_Jamaica"},
	{"en_MH",1033,"English_United States"},
	{"en_MT",9,"English_United States"},
	{"en_MU",9,"English_United States"},
	{"en_NA",9,"English_United States"},
	{"en_NZ",5129,"English_New Zealand"},
	{"en_PH",13321,"English_Republic of the Philippines"},
	{"en_PK",9,"English_United States"},
	{"en_TT",11273,"English_Trinidad and Tobago"},
	{"en_US",1033,"English_United States"},
	{"en_VI",9225,"English_Caribbean"},
	{"en_ZA",7177,"English_South Africa"},
	{"en_ZW",12297,"English_Zimbabwe"},
	{"es",10,"Spanish_Spain"},
	{"es_AR",11274,"Spanish_Argentina"},
	{"es_BO",16394,"Spanish_Bolivia"},
	{"es_CL",13322,"Spanish_Chile"},
	{"es_CO",9226,"Spanish_Colombia"},
	{"es_CR",5130,"Spanish_Costa Rica"},
	{"es_DO",7178,"Spanish_Dominican Republic"},
	{"es_EC",12298,"Spanish_Ecuador"},
	{"es_ES",3082,"Spanish_Spain"},
	{"es_GQ",10,"Spanish_Spain"},
	{"es_GT",4106,"Spanish_Guatemala"},
	{"es_HN",18442,"Spanish_Honduras"},
	{"es_MX",2058,"Spanish_Mexico"},
	{"es_NI",19466,"Spanish_Nicaragua"},
	{"es_PA",6154,"Spanish_Panama"},
	{"es_PE",10250,"Spanish_Peru"},
	{"es_PR",20490,"Spanish_Puerto Rico"},
	{"es_PY",15370,"Spanish_Paraguay"},
	{"es_SV",17418,"Spanish_El Salvador"},
	{"es_UY",14346,"Spanish_Uruguay"},
	{"es_VE",8202,"Spanish_Venezuela"},
	{"et",37,"Estonian_Estonia"},
	{"et_EE",1061,"Estonian_Estonia"},
	{"eu",45,"Basque_Spain"},
	{"eu_ES",1069,"Basque_Spain"},
	{"fa",41,"Farsi_Iran"},
	{"fa_IR",1065,"Farsi_Iran"},
	{"fi",11,"Finnish_Finland"},
	{"fi_FI",1035,"Finnish_Finland"},
	{"fil",100,"Filipino_Philippines"},
	{"fil_PH",1124,"Filipino_Philippines"},
	{"fo",56,"Faroese_Faroe Islands"},
	{"fo_FO",1080,"Faroese_Faroe Islands"},
	{"fr",12,"French_France"},
	{"fr_BE",2060,"French_Belgium"},
	{"fr_BL",12,"French_France"},
	{"fr_CA",3084,"French_Canada"},
	{"fr_CF",12,"French_France"},
	{"fr_CH",4108,"French_Switzerland"},
	{"fr_FR",1036,"French_France"},
	{"fr_GN",12,"French_France"},
	{"fr_GP",12,"French_France"},
	{"fr_LU",5132,"French_Luxembourg"},
	{"fr_MC",6156,"French_Principality of Monaco"},
	{"fr_MF",12,"French_France"},
	{"fr_MG",12,"French_France"},
	{"fr_MQ",12,"French_France"},
	{"fr_NE",12,"French_France"},
	{"ga_IE",2108,"Irish_Ireland"},
	{"gl",86,"Galician_Spain"},
	{"gl_ES",1110,"Galician_Spain"},
	{"gu",71,"Gujarati_India"},
	{"gu_IN",1095,"Gujarati_India"},
	{"he",13,"Hebrew_Israel"},
	{"he_IL",1037,"Hebrew_Israel"},
	{"hi",57,"Hindi_India"},
	{"hi_IN",1081,"Hindi_India"},
	{"hr",26,"Croatian_Croatia"},
	{"hr_HR",1050,"Croatian_Croatia"},
	{"hu",14,"Hungarian_Hungary"},
	{"hu_HU",1038,"Hungarian_Hungary"},
	{"hy",43,"Armenian_Armenia"},
	{"hy_AM",1067,"Armenian_Armenia"},
	{"id",33,"Indonesian_Indonesia"},
	{"id_ID",1057,"Indonesian_Indonesia"},
	{"is",15,"Icelandic_Iceland"},
	{"is_IS",1039,"Icelandic_Iceland"},
	{"it",16,"Italian_Italy"},
	{"it_CH",2064,"Italian_Switzerland"},
	{"it_IT",1040,"Italian_Italy"},
	{"ja",17,"Japanese_Japan"},
	{"ja_JP",1041,"Japanese_Japan"},
	{"ka",55,"Georgian_Georgia"},
	{"ka_GE",1079,"Georgian_Georgia"},
	{"kk",63,"Kazakh_Kazakhstan"},
	{"kk_Cyrl",63,"Kazakh_Kazakhstan"},
	{"kk_Cyrl_KZ",63,"Kazakh_Kazakhstan"},
	{"kn",75,"Kannada_India"},
	{"kn_IN",1099,"Kannada_India"},
	{"ko",18,"Korean_Korea"},
	{"ko_KR",1042,"Korean_Korea"},
	{"kok",87,"Konkani_India"},
	{"kok_IN",1111,"Konkani_India"},
	{"lt",39,"Lithuanian_Lithuania"},
	{"lt_LT",1063,"Lithuanian_Lithuania"},
	{"lv",38,"Latvian_Latvia"},
	{"lv_LV",1062,"Latvian_Latvia"},
	{"mk",47,"FYRO Macedonian_Former Yugoslav Republic of Macedonia"},
	{"mk_MK",1071,"FYRO Macedonian_Former Yugoslav Republic of Macedonia"},
	{"mr",78,"Marathi_India"},
	{"mr_IN",1102,"Marathi_India"},
	{"ms",62,"Malay_Malaysia"},
	{"ms_BN",2110,"Malay_Brunei Darussalam"},
	{"ms_MY",1086,"Malay_Malaysia"},
	{"mt",58,"Maltese_Malta"},
	{"mt_MT",1082,"Maltese_Malta"},
	{"nb_NO",1044,"Norwegian_Norway"},
	{"ne",97,"Nepali_Nepal"},
	{"ne_NP",1121,"Nepali_Nepal"},
	{"nl",19,"Dutch_Netherlands"},
	{"nl_BE",2067,"Dutch_Belgium"},
	{"nl_NL",1043,"Dutch_Netherlands"},
	{"nn_NO",2068,"Norwegian (Nynorsk)_Norway"},
	{"pa",70,"Punjabi_India"},
	{"pa_Arab",70,"Punjabi_India"},
	{"pa_Arab_PK",70,"Punjabi_India"},
	{"pa_Guru",70,"Punjabi_India"},
	{"pa_Guru_IN",70,"Punjabi_India"},
	{"pl",21,"Polish_Poland"},
	{"pl_PL",1045,"Polish_Poland"},
	{"ps",99,"Pashto_Afghanistan"},
	{"ps_AF",1123,"Pashto_Afghanistan"},
	{"pt",22,"Portuguese_Brazil"},
	{"pt_BR",1046,"Portuguese_Brazil"},
	{"pt_GW",22,"Portuguese_Brazil"},
	{"pt_MZ",22,"Portuguese_Brazil"},
	{"pt_PT",2070,"Portuguese_Portugal"},
	{"rm",23,"Romansh_Switzerland"},
	{"rm_CH",1047,"Romansh_Switzerland"},
	{"ro",24,"Romanian_Romania"},
	{"ro_MD",24,"Romanian_Romania"},
	{"ro_RO",1048,"Romanian_Romania"},
	{"ru",25,"Russian_Russia"},
	{"ru_MD",25,"Russian_Russia"},
	{"ru_RU",1049,"Russian_Russia"},
	{"ru_UA",25,"Russian_Russia"},
	{"sk",27,"Slovak_Slovakia"},
	{"sk_SK",1051,"Slovak_Slovakia"},
	{"sl",36,"Slovenian_Slovenia"},
	{"sl_SI",1060,"Slovenian_Slovenia"},
	{"sq",28,"Albanian_Albania"},
	{"sq_AL",1052,"Albanian_Albania"},
	{"sr_Cyrl_BA",7194,"Serbian (Cyrillic)_Bosnia and Herzegovina"},
	{"sr_Latn_BA",6170,"Serbian (Latin)_Bosnia and Herzegovina"},
	{"sv",29,"Swedish_Sweden"},
	{"sv_FI",2077,"Swedish_Finland"},
	{"sv_SE",1053,"Swedish_Sweden"},
	{"sw",65,"Swahili_Kenya"},
	{"sw_KE",1089,"Swahili_Kenya"},
	{"sw_TZ",65,"Swahili_Kenya"},
	{"ta",73,"Tamil_India"},
	{"ta_IN",1097,"Tamil_India"},
	{"ta_LK",73,"Tamil_India"},
	{"te",74,"Telugu_India"},
	{"te_IN",1098,"Telugu_India"},
	{"th",30,"Thai_Thailand"},
	{"th_TH",1054,"Thai_Thailand"},
	{"tr",31,"Turkish_Turkey"},
	{"tr_TR",1055,"Turkish_Turkey"},
	{"uk",34,"Ukrainian_Ukraine"},
	{"uk_UA",1058,"Ukrainian_Ukraine"},
	{"ur",32,"Urdu_Islamic Republic of Pakistan"},
	{"ur_PK",1056,"Urdu_Islamic Republic of Pakistan"},
	{"uz",67,"Uzbek (Latin)_Uzbekistan"},
	{"uz_Arab",67,"Uzbek (Latin)_Uzbekistan"},
	{"uz_Arab_AF",67,"Uzbek (Latin)_Uzbekistan"},
	{"uz_Cyrl_UZ",2115,"Uzbek (Cyrillic)_Uzbekistan"},
	{"uz_Latn_UZ",1091,"Uzbek (Latin)_Uzbekistan"},
	{"vi",42,"Vietnamese_Viet Nam"},
	{"vi_VN",1066,"Vietnamese_Viet Nam"},
	{"zh_Hans",4,"Chinese_Taiwan"},
	{"zh_Hans_CN",2052,"Chinese_People's Republic of China"},
	{"zh_Hans_HK",4,"Chinese_Taiwan"},
	{"zh_Hans_MO",4,"Chinese_Taiwan"},
	{"zh_Hans_SG",4100,"Chinese_Singapore"},
	{"zh_Hant_TW",1028,"Chinese_Taiwan"},
	{"zu",53,"Zulu_South Africa"},
	{"zu_ZA",1077,"Zulu_South Africa"},
};

ECLocale createLocaleFromName(const char *lpszLocale)
{
	return Locale::createFromName(lpszLocale);
}

ECRESULT LocaleIdToLCID(const char *lpszLocaleID, ULONG *lpulLcid)
{
	const struct localemap *lpMapEntry = NULL;
	assert(lpszLocaleID != NULL);
	assert(lpulLcid != NULL);

	for (size_t i = 0; lpMapEntry == nullptr && i < ARRAY_SIZE(localeMap); ++i)
		if (strcasecmp(localeMap[i].lpszLocaleID, lpszLocaleID) == 0)
			lpMapEntry = &localeMap[i];

	if (lpMapEntry == NULL)
		return KCERR_NOT_FOUND;
	*lpulLcid = lpMapEntry->ulLCID;
	return erSuccess;
}

ECRESULT LCIDToLocaleId(ULONG ulLcid, const char **lppszLocaleID)
{
	const struct localemap *lpMapEntry = NULL;
	assert(lppszLocaleID != NULL);

	for (size_t i = 0; lpMapEntry == nullptr && i < ARRAY_SIZE(localeMap); ++i)
		if (localeMap[i].ulLCID == ulLcid)
			lpMapEntry = &localeMap[i];

	if (lpMapEntry == NULL)
		return KCERR_NOT_FOUND;
	*lppszLocaleID = lpMapEntry->lpszLocaleID;
	return erSuccess;
}

/**
 * Create a locale independant blob that can be used to sort
 * strings fast. This is used when a string would be compared
 * multiple times.
 *
 * @param[in]	s			The string to compare.
 * @param[in]	nCap		Base the key on the first nCap characters of s (if larger than 0).
 * @param[in]	locale		The locale used to create the sort key.
 *
 * @returns		ECSortKey object containing the blob
 */
static ECSortKey createSortKey(UnicodeString s, int nCap,
    const ECLocale &locale)
{
	if (nCap > 1)
		s.truncate(nCap);

	// Quick workaround for sorting items starting with ' (like From and To) and ( and '(
	if (s.startsWith("'") || s.startsWith("("))
		s.remove(0, 1);

	CollationKey key;
	UErrorCode status = U_ZERO_ERROR;
	unique_ptr_Collator ptrCollator(Collator::createInstance(locale, status));
	ptrCollator->getCollationKey(s, key, status);	// Create a collation key for sorting

	return key;
}

/**
 * Create a locale independant blob that can be used to sort
 * strings fast. This is used when a string would be compared
 * multiple times.
 *
 * @param[in]	s			The string to compare.
 * @param[in]	nCap		Base the key on the first nCap characters of s (if larger than 0).
 * @param[in]	locale		The locale used to create the sort key.
 * @param[out]	lpcbKeys	The size in bytes of the returned key.
 * @param[ou]t	lppKey		The returned key.
 */
static void createSortKeyData(const UnicodeString &s, int nCap, const ECLocale &locale, unsigned int *lpcbKey, unsigned char **lppKey)
{
	unsigned char *lpKey = NULL;

	CollationKey key = createSortKey(s, nCap, locale);

	int32_t 		cbKeyData = 0;
	const uint8_t	*lpKeyData = key.getByteArray(cbKeyData);

	lpKey = new unsigned char[cbKeyData];
	memcpy(lpKey, lpKeyData, cbKeyData);		

	*lpcbKey = cbKeyData;
	*lppKey = lpKey;
}

/**
 * Create a locale independant blob that can be used to sort
 * strings fast. This is used when a string would be compared
 * multiple times.
 *
 * @param[in]	s			The string to compare.
 * @param[in]	nCap		Base the key on the first nCap characters of s (if larger than 0).
 * @param[in]	locale		The locale used to create the sort key.
 * @param[out]	lpcbKeys	The size in bytes of the returned key.
 * @param[ou]t	lppKey		The returned key.
 */
void createSortKeyData(const char *s, int nCap, const ECLocale &locale, unsigned int *lpcbKey, unsigned char **lppKey)
{
	assert(s != NULL);
	assert(lpcbKey != NULL);
	assert(lppKey != NULL);
	createSortKeyData(UnicodeString(s), nCap, locale, lpcbKey, lppKey);
}

/**
 * Create a locale independant blob that can be used to sort
 * strings fast. This is used when a string would be compared
 * multiple times.
 *
 * @param[in]	s			The string to compare.
 * @param[in]	locale		The locale used to create the sort key.
 * @param[out]	lpcbKeys	The size in bytes of the returned key.
 * @param[ou]t	lppKey		The returned key.
 */
void createSortKeyData(const wchar_t *s, int nCap, const ECLocale &locale, unsigned int *lpcbKey, unsigned char **lppKey)
{
	assert(s != NULL);
	assert(lpcbKey != NULL);
	assert(lppKey != NULL);
	UnicodeString ustring;
	ustring = UTF32ToUnicode((const UChar32*)s);
	createSortKeyData(ustring, nCap, locale, lpcbKey, lppKey);
}

/**
 * Create a locale independant blob that can be used to sort
 * strings fast. This is used when a string would be compared
 * multiple times.
 *
 * @param[in]	s			The string to compare.
 * @param[in]	nCap		Base the key on the first nCap characters of s (if larger than 0).
 * @param[in]	locale		The locale used to create the sort key.
 * @param[out]	lpcbKeys	The size in bytes of the returned key.
 * @param[ou]t	lppKey		The returned key.
 */
void createSortKeyDataFromUTF8(const char *s, int nCap, const ECLocale &locale, unsigned int *lpcbKey, unsigned char **lppKey)
{
	assert(s != NULL);
	assert(lpcbKey != NULL);
	assert(lppKey != NULL);
	createSortKeyData(UTF8ToUnicode(s), nCap, locale, lpcbKey, lppKey);
}

/**
 * Create a locale independant blob that can be used to sort
 * strings fast. This is used when a string would be compared
 * multiple times.
 *
 * @param[in]	s			The string to compare.
 * @param[in]	nCap		Base the key on the first nCap characters of s (if larger than 0).
 * @param[in]	locale		The locale used to create the sort key.
 *
 * @returns		The ECSortKey containing the blob.
 */
ECSortKey createSortKeyFromUTF8(const char *s, int nCap, const ECLocale &locale)
{
	assert(s != NULL);
	return createSortKey(UTF8ToUnicode(s), nCap, locale);
}

/**
 * Compare two sort keys previously created with createSortKey.
 * 
 * @param[in]	cbKey1		The size i nbytes of key 1.
 * @param[in]	lpKey1		Key 1.
 * @param[in]	cbKey2		The size i nbytes of key 2.
 * @param[in]	lpKey2		Key 2.
 *
 * @retval	<0	Key1 is smaller than key2
 * @retval	0	Key1 equals key2
 * @retval	>0	Key1 is greater than key2
 */
int compareSortKeys(unsigned int cbKey1, const unsigned char *lpKey1, unsigned int cbKey2, const unsigned char *lpKey2)
{
	assert(!(cbKey1 != 0 && lpKey1 == NULL));
	assert(!(cbKey2 != 0 && lpKey2 == NULL));
	CollationKey ckA(lpKey1, cbKey1);
	CollationKey ckB(lpKey2, cbKey2);

	int cmp = 1;
	UErrorCode status = U_ZERO_ERROR;
	switch (ckA.compareTo(ckB, status)) {
	case UCOL_LESS:		cmp = -1; break;
	case UCOL_EQUAL:	cmp =  0; break;
	case UCOL_GREATER:	cmp =  1; break;
	}
	return cmp;
}

} /* namespace */

/** @} */