Collate.cxx 5.08 KB
Newer Older
1
/*
2
 * Copyright 2003-2016 The Music Player Daemon Project
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
 * http://www.musicpd.org
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */

#include "config.h"
#include "Collate.hxx"
22
#include "util/AllocatedString.hxx"
23 24

#ifdef HAVE_ICU
25
#include "Util.hxx"
26
#include "Error.hxx"
27
#include "util/AllocatedArray.hxx"
28
#include "util/ConstBuffer.hxx"
29 30 31 32 33 34 35 36 37
#include "util/Error.hxx"

#include <unicode/ucol.h>
#include <unicode/ustring.h>
#else
#include <algorithm>
#include <ctype.h>
#endif

38 39 40 41 42 43
#ifdef WIN32
#include "Win32.hxx"
#include "util/AllocatedString.hxx"
#include <windows.h>
#endif

44
#include <memory>
45
#include <stdexcept>
46

47 48 49 50 51 52 53
#include <assert.h>
#include <string.h>

#ifdef HAVE_ICU
static UCollator *collator;
#endif

54 55
#ifdef HAVE_ICU

56 57 58 59 60 61
bool
IcuCollateInit(Error &error)
{
	assert(collator == nullptr);
	assert(!error.IsDefined());

62
	UErrorCode code = U_ZERO_ERROR;
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
	collator = ucol_open("", &code);
	if (collator == nullptr) {
		error.Format(icu_domain, int(code),
			     "ucol_open() failed: %s", u_errorName(code));
		return false;
	}

	return true;
}

void
IcuCollateFinish()
{
	assert(collator != nullptr);

	ucol_close(collator);
}

#endif

gcc_pure
int
IcuCollate(const char *a, const char *b)
{
87 88
#if !CLANG_CHECK_VERSION(3,6)
	/* disabled on clang due to -Wtautological-pointer-compare */
89 90
	assert(a != nullptr);
	assert(b != nullptr);
91
#endif
92 93 94 95 96

#ifdef HAVE_ICU
	assert(collator != nullptr);

#if U_ICU_VERSION_MAJOR_NUM >= 50
97 98
	UErrorCode code = U_ZERO_ERROR;
	return (int)ucol_strcollUTF8(collator, a, -1, b, -1, &code);
99 100 101
#else
	/* fall back to ucol_strcoll() */

102 103 104
	try {
		const auto au = UCharFromUTF8(a);
		const auto bu = UCharFromUTF8(b);
105

106 107 108 109 110 111
		return ucol_strcoll(collator, au.begin(), au.size(),
				    bu.begin(), bu.size());
	} catch (const std::runtime_error &) {
		/* fall back to plain strcasecmp() */
		return strcasecmp(a, b);
	}
112 113
#endif

114
#elif defined(WIN32)
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
	AllocatedString<wchar_t> wa = nullptr, wb = nullptr;

	try {
		wa = MultiByteToWideChar(CP_UTF8, a);
	} catch (const std::runtime_error &) {
		try {
			wb = MultiByteToWideChar(CP_UTF8, b);
			return -1;
		} catch (const std::runtime_error &) {
			return 0;
		}
	}

	try {
		wb = MultiByteToWideChar(CP_UTF8, b);
	} catch (const std::runtime_error &) {
131
		return 1;
132
	}
133 134 135 136 137 138 139 140 141 142 143 144 145

	auto result = CompareStringEx(LOCALE_NAME_INVARIANT,
				      LINGUISTIC_IGNORECASE,
				      wa.c_str(), -1,
				      wb.c_str(), -1,
				      nullptr, nullptr, 0);
	if (result != 0)
		/* "To maintain the C runtime convention of comparing
		   strings, the value 2 can be subtracted from a
		   nonzero return value." */
		result -= 2;

	return result;
146
#else
147
	return strcoll(a, b);
148 149 150
#endif
}

151
AllocatedString<>
152
IcuCaseFold(const char *src)
153
try {
154 155
#ifdef HAVE_ICU
	assert(collator != nullptr);
156 157
#if !CLANG_CHECK_VERSION(3,6)
	/* disabled on clang due to -Wtautological-pointer-compare */
158
	assert(src != nullptr);
159
#endif
160

161 162
	const auto u = UCharFromUTF8(src);
	if (u.IsNull())
163
		return AllocatedString<>::Duplicate(src);
164

165
	AllocatedArray<UChar> folded(u.size() * 2u);
166 167

	UErrorCode error_code = U_ZERO_ERROR;
168
	size_t folded_length = u_strFoldCase(folded.begin(), folded.size(),
169
					     u.begin(), u.size(),
170 171
					     U_FOLD_CASE_DEFAULT,
					     &error_code);
172
	if (folded_length == 0 || error_code != U_ZERO_ERROR)
173
		return AllocatedString<>::Duplicate(src);
174

175 176
	folded.SetSize(folded_length);
	return UCharToUTF8({folded.begin(), folded.size()});
177 178 179 180 181 182 183 184 185 186 187

#elif defined(WIN32)
	const auto u = MultiByteToWideChar(CP_UTF8, src);

	const int size = LCMapStringEx(LOCALE_NAME_INVARIANT,
				       LCMAP_SORTKEY|LINGUISTIC_IGNORECASE,
				       u.c_str(), -1, nullptr, 0,
				       nullptr, nullptr, 0);
	if (size <= 0)
		return AllocatedString<>::Duplicate(src);

188
	std::unique_ptr<wchar_t[]> buffer(new wchar_t[size]);
189 190
	if (LCMapStringEx(LOCALE_NAME_INVARIANT,
			  LCMAP_SORTKEY|LINGUISTIC_IGNORECASE,
191 192
			  u.c_str(), -1, buffer.get(), size,
			  nullptr, nullptr, 0) <= 0)
193 194
		return AllocatedString<>::Duplicate(src);

195
	return WideCharToMultiByte(CP_UTF8, buffer.get());
196

197
#else
198
	size_t size = strlen(src) + 1;
199 200
	std::unique_ptr<char[]> buffer(new char[size]);
	size_t nbytes = strxfrm(buffer.get(), src, size);
201 202
	if (nbytes >= size) {
		/* buffer too small - reallocate and try again */
203
		buffer.reset();
204
		size = nbytes + 1;
205 206
		buffer.reset(new char[size]);
		nbytes = strxfrm(buffer.get(), src, size);
207 208 209 210 211
	}

	assert(nbytes < size);
	assert(buffer[nbytes] == 0);

212
	return AllocatedString<>::Donate(buffer.release());
213
#endif
214 215
} catch (const std::runtime_error &) {
	return AllocatedString<>::Duplicate(src);
216
}