Commit 43c389b9 authored by Max Kellermann's avatar Max Kellermann

added "length" parameter to validUtf8String()

At several places, we create temporary copies of non-null-terminated strings, just to use them in functions like validUtf8String(). We can save this temporary allocation and avoid heap fragmentation if we add a length parameter instead of expecting a null-terminated string.
parent 92b75767
......@@ -47,7 +47,7 @@ static char *path_conv_charset(char *dest, const char *to,
char *fs_charset_to_utf8(char *dst, const char *str)
{
char *ret = path_conv_charset(dst, "UTF-8", fsCharset, str);
return (ret && !validUtf8String(ret)) ? NULL : ret;
return (ret && !validUtf8String(ret, strlen(ret))) ? NULL : ret;
}
char *utf8_to_fs_charset(char *dst, const char *str)
......
......@@ -352,7 +352,7 @@ static inline char *fix_utf8(char *str) {
assert(str != NULL);
if (validUtf8String(str))
if (validUtf8String(str, strlen(str)))
return str;
DEBUG("not valid utf8 in tag: %s\n",str);
......
......@@ -69,10 +69,12 @@ static char utf8_to_latin1_char(const char *inUtf8)
return (char)(c + utf8[1]);
}
static unsigned int validateUtf8Char(const char *inUtf8Char)
static unsigned int validateUtf8Char(const char *inUtf8Char, size_t length)
{
const unsigned char *utf8Char = (const unsigned char *)inUtf8Char;
assert(length > 0);
if (utf8Char[0] < 0x80)
return 1;
......@@ -84,7 +86,7 @@ static unsigned int validateUtf8Char(const char *inUtf8Char)
t = (t >> 1);
count++;
}
if (count > 5)
if (count > 5 || (size_t)count > length)
return 0;
for (i = 1; i <= count; i++) {
if (utf8Char[i] < 0x80 || utf8Char[i] > 0xBF)
......@@ -95,15 +97,17 @@ static unsigned int validateUtf8Char(const char *inUtf8Char)
return 0;
}
int validUtf8String(const char *string)
int validUtf8String(const char *string, size_t length)
{
unsigned int ret;
while (*string) {
ret = validateUtf8Char(string);
while (length > 0) {
ret = validateUtf8Char(string, length);
assert((size_t)ret <= length);
if (0 == ret)
return 0;
string += ret;
length -= ret;
}
return 1;
......@@ -118,7 +122,7 @@ char *utf8StrToLatin1Dup(const char *utf8)
size_t len = 0;
while (*utf8) {
count = validateUtf8Char(utf8);
count = validateUtf8Char(utf8, INT_MAX);
if (!count) {
free(ret);
return NULL;
......@@ -140,7 +144,7 @@ char *utf8_to_latin1(char *dest, const char *utf8)
size_t len = 0;
while (*utf8) {
count = validateUtf8Char(utf8);
count = validateUtf8Char(utf8, INT_MAX);
if (count) {
*(cp++) = utf8_to_latin1_char(utf8);
utf8 += count;
......
......@@ -19,11 +19,13 @@
#ifndef UTF_8_H
#define UTF_8_H
#include <os_compat.h>
char *latin1StrToUtf8Dup(const char *latin1);
char *utf8StrToLatin1Dup(const char *utf8);
int validUtf8String(const char *string);
int validUtf8String(const char *string, size_t length);
char *utf8_to_latin1(char *dest, const char *utf8);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment