Commit 20686f47 authored by Hans Leidekker's avatar Hans Leidekker Committed by Alexandre Julliard

webservices: Add support for encoded UTF-8 characters in WsDecodeUrl.

parent 947fbfee
......@@ -54,8 +54,10 @@ static void test_WsDecodeUrl(void)
'%','2','0','2',0};
static WCHAR url26[] = {'h','t','t','p',':','/','/','h','o','s','t','#','f','r','a','g',
'%','2','0','2',0};
static WCHAR url27[] = {'h','t','t','p',':','/','/','h','o','s','t','/','%','c','3','%','a','b','/',0};
static WCHAR host2[] = {'h','o','s','t',' ','2'};
static WCHAR path2[] = {'/','p','a','t','h',' ','2'};
static WCHAR path3[] = {'/',0xeb,'/'};
static WCHAR query2[] = {'q','u','e','r','y',' ','2'};
static WCHAR frag2[] = {'f','r','a','g',' ','2'};
static const struct
......@@ -103,8 +105,9 @@ static void test_WsDecodeUrl(void)
url22 + 12, 4 },
{ url23, S_OK, WS_URL_HTTP_SCHEME_TYPE, host2, 6, 80 },
{ url24, S_OK, WS_URL_HTTP_SCHEME_TYPE, url24 + 7, 4, 80, NULL, 0, path2, 7 },
{ url25, S_OK, WS_URL_HTTP_SCHEME_TYPE, url24 + 7, 4, 80, NULL, 0, NULL, 0, query2, 7 },
{ url26, S_OK, WS_URL_HTTP_SCHEME_TYPE, url24 + 7, 4, 80, NULL, 0, NULL, 0, NULL, 0, frag2, 6 },
{ url25, S_OK, WS_URL_HTTP_SCHEME_TYPE, url25 + 7, 4, 80, NULL, 0, NULL, 0, query2, 7 },
{ url26, S_OK, WS_URL_HTTP_SCHEME_TYPE, url26 + 7, 4, 80, NULL, 0, NULL, 0, NULL, 0, frag2, 6 },
{ url27, S_OK, WS_URL_HTTP_SCHEME_TYPE, url27 + 7, 4, 80, NULL, 0, path3, 3 },
};
WS_HEAP *heap;
WS_STRING str;
......
......@@ -71,11 +71,36 @@ static USHORT default_port( WS_URL_SCHEME_TYPE scheme )
}
}
static unsigned char *strdup_utf8( const WCHAR *str, ULONG len, ULONG *ret_len )
{
unsigned char *ret;
*ret_len = WideCharToMultiByte( CP_UTF8, 0, str, len, NULL, 0, NULL, NULL );
if ((ret = heap_alloc( *ret_len )))
WideCharToMultiByte( CP_UTF8, 0, str, len, (char *)ret, *ret_len, NULL, NULL );
return ret;
}
static inline int url_decode_byte( char c1, char c2 )
{
int ret;
if (c1 >= '0' && c1 <= '9') ret = (c1 - '0') * 16;
else if (c1 >= 'a' && c1 <= 'f') ret = (c1 - 'a' + 10) * 16;
else ret = (c1 - 'A' + 10) * 16;
if (c2 >= '0' && c2 <= '9') ret += c2 - '0';
else if (c2 >= 'a' && c2 <= 'f') ret += c2 - 'a' + 10;
else ret += c2 - 'A' + 10;
return ret;
}
static WCHAR *url_decode( WCHAR *str, ULONG len, WS_HEAP *heap, ULONG *ret_len )
{
WCHAR *p = str, *q, *ret;
BOOL decode = FALSE;
ULONG i, val;
BOOL decode = FALSE, convert = FALSE;
ULONG i, len_utf8, len_left;
unsigned char *utf8, *r;
*ret_len = len;
for (i = 0; i < len; i++, p++)
......@@ -84,36 +109,62 @@ static WCHAR *url_decode( WCHAR *str, ULONG len, WS_HEAP *heap, ULONG *ret_len )
if (p[0] == '%' && isxdigitW( p[1] ) && isxdigitW( p[2] ))
{
decode = TRUE;
if (url_decode_byte( p[1], p[2] ) > 159)
{
convert = TRUE;
break;
}
*ret_len -= 2;
}
}
if (!decode) return str;
if (!(q = ret = ws_alloc( heap, *ret_len * sizeof(WCHAR) ))) return NULL;
p = str;
while (len)
if (!convert)
{
if (len >= 3 && p[0] == '%' && isxdigitW( p[1] ) && isxdigitW( p[2] ))
if (!(q = ret = ws_alloc( heap, *ret_len * sizeof(WCHAR) ))) return NULL;
p = str;
while (len)
{
if (p[1] >= '0' && p[1] <= '9') val = (p[1] - '0') * 16;
else if (p[1] >= 'a' && p[1] <= 'f') val = (p[1] - 'a') * 16;
else val = (p[1] - 'A') * 16;
if (p[2] >= '0' && p[2] <= '9') val += p[2] - '0';
else if (p[1] >= 'a' && p[1] <= 'f') val += p[2] - 'a';
else val += p[1] - 'A';
*q++ = val;
p += 3;
len -= 3;
if (len >= 3 && p[0] == '%' && isxdigitW( p[1] ) && isxdigitW( p[2] ))
{
*q++ = url_decode_byte( p[1], p[2] );
p += 3;
len -= 3;
}
else
{
*q++ = *p++;
len -= 1;
}
}
else
return ret;
}
if (!(r = utf8 = strdup_utf8( str, len, &len_utf8 ))) return NULL;
len_left = len_utf8;
while (len_left)
{
if (len_left >= 3 && r[0] == '%' && isxdigit( r[1] ) && isxdigit( r[2] ))
{
*q++ = *p++;
len -= 1;
r[0] = url_decode_byte( r[1], r[2] );
len_left -= 3;
memmove( r + 1, r + 3, len_left );
len_utf8 -= 2;
}
else len_left -= 1;
r++;
}
if (!(*ret_len = MultiByteToWideChar( CP_UTF8, MB_ERR_INVALID_CHARS, (char *)utf8,
len_utf8, NULL, 0 )))
{
WARN( "invalid UTF-8 sequence\n" );
heap_free( utf8 );
return NULL;
}
if ((ret = ws_alloc( heap, *ret_len * sizeof(WCHAR) )))
MultiByteToWideChar( CP_UTF8, 0, (char *)utf8, len_utf8, ret, *ret_len );
heap_free( utf8 );
return ret;
}
......@@ -276,20 +327,11 @@ static inline ULONG escape_size( unsigned char ch, const char *except )
}
}
static char *strdup_utf8( const WCHAR *str, ULONG len, ULONG *ret_len )
{
char *ret;
*ret_len = WideCharToMultiByte( CP_UTF8, 0, str, len, NULL, 0, NULL, NULL );
if ((ret = heap_alloc( *ret_len )))
WideCharToMultiByte( CP_UTF8, 0, str, len, ret, *ret_len, NULL, NULL );
return ret;
}
static HRESULT url_encode_size( const WCHAR *str, ULONG len, const char *except, ULONG *ret_len )
{
ULONG i, len_utf8;
BOOL convert = FALSE;
char *utf8;
unsigned char *utf8;
*ret_len = 0;
for (i = 0; i < len; i++)
......@@ -339,7 +381,7 @@ static HRESULT url_encode( const WCHAR *str, ULONG len, WCHAR *buf, const char *
ULONG i, len_utf8, len_enc;
BOOL convert = FALSE;
WCHAR *p = buf;
char *utf8;
unsigned char *utf8;
*ret_len = 0;
for (i = 0; i < len; i++)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment