Collate.cxx 4.45 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
/*
 * Copyright (C) 2003-2014 The Music Player Daemon Project
 * http://www.musicpd.org
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */

#include "config.h"
#include "Collate.hxx"

#ifdef HAVE_ICU
#include "Error.hxx"
25
#include "util/WritableBuffer.hxx"
26
#include "util/ConstBuffer.hxx"
27 28 29 30 31 32 33 34 35 36 37 38 39 40
#include "util/Error.hxx"
#include "util/Domain.hxx"

#include <unicode/ucol.h>
#include <unicode/ustring.h>
#elif defined(HAVE_GLIB)
#include <glib.h>
#else
#include <algorithm>
#include <ctype.h>
#endif

#include <assert.h>
#include <string.h>
41
#include <strings.h>
42 43 44 45 46

#ifdef HAVE_ICU
static UCollator *collator;
#endif

47 48
#ifdef HAVE_ICU

49 50 51 52 53 54
bool
IcuCollateInit(Error &error)
{
	assert(collator == nullptr);
	assert(!error.IsDefined());

55
	UErrorCode code = U_ZERO_ERROR;
56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
	collator = ucol_open("", &code);
	if (collator == nullptr) {
		error.Format(icu_domain, int(code),
			     "ucol_open() failed: %s", u_errorName(code));
		return false;
	}

	return true;
}

void
IcuCollateFinish()
{
	assert(collator != nullptr);

	ucol_close(collator);
}

74 75
static WritableBuffer<UChar>
UCharFromUTF8(const char *src)
76 77 78 79
{
	assert(src != nullptr);

	const size_t src_length = strlen(src);
80
	const size_t dest_capacity = src_length;
81 82
	UChar *dest = new UChar[dest_capacity];

83
	UErrorCode error_code = U_ZERO_ERROR;
84 85
	int32_t dest_length;
	u_strFromUTF8(dest, dest_capacity, &dest_length,
86 87 88 89 90 91 92
		      src, src_length,
		      &error_code);
	if (U_FAILURE(error_code)) {
		delete[] dest;
		return nullptr;
	}

93
	return { dest, size_t(dest_length) };
94 95
}

96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
static WritableBuffer<char>
UCharToUTF8(ConstBuffer<UChar> src)
{
	assert(!src.IsNull());

	/* worst-case estimate */
	size_t dest_capacity = 4 * src.size;

	char *dest = new char[dest_capacity];

	UErrorCode error_code = U_ZERO_ERROR;
	int32_t dest_length;
	u_strToUTF8(dest, dest_capacity, &dest_length, src.data, src.size,
		    &error_code);
	if (U_FAILURE(error_code)) {
		delete[] dest;
		return nullptr;
	}

	return { dest, size_t(dest_length) };
}

118 119 120 121 122 123
#endif

gcc_pure
int
IcuCollate(const char *a, const char *b)
{
124 125
#if !CLANG_CHECK_VERSION(3,6)
	/* disabled on clang due to -Wtautological-pointer-compare */
126 127
	assert(a != nullptr);
	assert(b != nullptr);
128
#endif
129 130 131 132 133

#ifdef HAVE_ICU
	assert(collator != nullptr);

#if U_ICU_VERSION_MAJOR_NUM >= 50
134 135
	UErrorCode code = U_ZERO_ERROR;
	return (int)ucol_strcollUTF8(collator, a, -1, b, -1, &code);
136 137 138
#else
	/* fall back to ucol_strcoll() */

139 140
	const auto au = UCharFromUTF8(a);
	const auto bu = UCharFromUTF8(b);
141

142 143 144
	int result = !au.IsNull() && !bu.IsNull()
		? (int)ucol_strcoll(collator, au.data, au.size,
				    bu.data, bu.size)
145 146
		: strcasecmp(a, b);

147 148
	delete[] au.data;
	delete[] bu.data;
149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164

	return result;
#endif

#elif defined(HAVE_GLIB)
	return g_utf8_collate(a, b);
#else
	return strcasecmp(a, b);
#endif
}

std::string
IcuCaseFold(const char *src)
{
#ifdef HAVE_ICU
	assert(collator != nullptr);
165 166
#if !CLANG_CHECK_VERSION(3,6)
	/* disabled on clang due to -Wtautological-pointer-compare */
167
	assert(src != nullptr);
168
#endif
169

170 171
	const auto u = UCharFromUTF8(src);
	if (u.IsNull())
172 173
		return std::string(src);

174 175 176 177 178 179 180 181 182 183 184
	size_t folded_capacity = u.size * 2u;
	UChar *folded = new UChar[folded_capacity];

	UErrorCode error_code = U_ZERO_ERROR;
	size_t folded_length = u_strFoldCase(folded, folded_capacity,
					   u.data, u.size,
					   U_FOLD_CASE_DEFAULT,
					   &error_code);
	delete[] u.data;
	if (folded_length == 0 || error_code != U_ZERO_ERROR) {
		delete[] folded;
185 186 187
		return std::string(src);
	}

188 189 190 191 192 193 194
	auto result2 = UCharToUTF8({folded, folded_length});
	delete[] folded;
	if (result2.IsNull())
		return std::string(src);

	std::string result(result2.data, result2.size);
	delete[] result2.data;
195 196 197 198 199 200 201 202 203 204 205
#elif defined(HAVE_GLIB)
	char *tmp = g_utf8_casefold(src, -1);
	std::string result(tmp);
	g_free(tmp);
#else
	std::string result(src);
	std::transform(result.begin(), result.end(), result.begin(), tolower);
#endif
	return result;
}