Commit 37aabda6 authored by Zebediah Figura's avatar Zebediah Figura Committed by Alexandre Julliard

kernelbase: Reimplement UrlCanonicalize().

parent 80a92c07
/*
* Copyright 2018 Nikolay Sivov
* Copyright 2018 Zhiyi Zhang
* Copyright 2021-2023 Zebediah Figura
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
......@@ -18,7 +19,9 @@
*/
#include <stdarg.h>
#include <stdbool.h>
#include <string.h>
#include <wchar.h>
#include "windef.h"
#include "winbase.h"
......@@ -91,6 +94,38 @@ static WCHAR *heap_strdupAtoW(const char *str)
return ret;
}
static bool array_reserve(void **elements, size_t *capacity, size_t count, size_t size)
{
unsigned int new_capacity, max_capacity;
void *new_elements;
if (count <= *capacity)
return true;
max_capacity = ~(SIZE_T)0 / size;
if (count > max_capacity)
return false;
new_capacity = max(4, *capacity);
while (new_capacity < count && new_capacity <= max_capacity / 2)
new_capacity *= 2;
if (new_capacity < count)
new_capacity = max_capacity;
if (!(new_elements = heap_realloc( *elements, new_capacity * size )))
return false;
*elements = new_elements;
*capacity = new_capacity;
return true;
}
static bool is_slash( char c )
{
return c == '/' || c == '\\';
}
static BOOL is_drive_spec( const WCHAR *str )
{
return isalpha( str[0] ) && str[1] == ':';
......@@ -2770,6 +2805,13 @@ url_schemes[] =
{ URL_SCHEME_RES, L"res"},
};
static const WCHAR *parse_scheme( const WCHAR *p )
{
while (*p <= 0x7f && (iswalnum( *p ) || *p == '+' || *p == '-' || *p == '.'))
++p;
return p;
}
static DWORD get_scheme_code(const WCHAR *scheme, DWORD scheme_len)
{
unsigned int i;
......@@ -3554,348 +3596,666 @@ HRESULT WINAPI UrlCanonicalizeA(const char *src_url, char *canonicalized, DWORD
return hr;
}
HRESULT WINAPI UrlCanonicalizeW(const WCHAR *src_url, WCHAR *canonicalized, DWORD *canonicalized_len, DWORD flags)
static bool scheme_is_opaque( URL_SCHEME scheme )
{
WCHAR *url_copy, *url, *wk2, *mp, *mp2;
DWORD nByteLen, nLen, nWkLen;
const WCHAR *wk1, *root;
DWORD escape_flags;
WCHAR slash = '\0';
HRESULT hr = S_OK;
BOOL is_file_url;
INT state;
switch (scheme)
{
case URL_SCHEME_ABOUT:
case URL_SCHEME_JAVASCRIPT:
case URL_SCHEME_MAILTO:
case URL_SCHEME_SHELL:
case URL_SCHEME_VBSCRIPT:
return true;
TRACE("%s, %p, %p, %#lx\n", wine_dbgstr_w(src_url), canonicalized, canonicalized_len, flags);
default:
return false;
}
}
if (!src_url || !canonicalized || !canonicalized_len || !*canonicalized_len)
return E_INVALIDARG;
static bool scheme_preserves_backslashes( URL_SCHEME scheme )
{
switch (scheme)
{
case URL_SCHEME_FTP:
case URL_SCHEME_INVALID:
case URL_SCHEME_LOCAL:
case URL_SCHEME_MK:
case URL_SCHEME_RES:
case URL_SCHEME_UNKNOWN:
case URL_SCHEME_WAIS:
return true;
if (!*src_url)
default:
return false;
}
}
static bool scheme_uses_hostname( URL_SCHEME scheme )
{
switch (scheme)
{
*canonicalized = 0;
return S_OK;
case URL_SCHEME_ABOUT:
case URL_SCHEME_JAVASCRIPT:
case URL_SCHEME_MAILTO:
case URL_SCHEME_MK:
case URL_SCHEME_SHELL:
case URL_SCHEME_VBSCRIPT:
return false;
default:
return true;
}
}
/* Remove '\t' characters from URL */
nByteLen = (lstrlenW(src_url) + 1) * sizeof(WCHAR); /* length in bytes */
url = HeapAlloc(GetProcessHeap(), 0, nByteLen);
if(!url)
return E_OUTOFMEMORY;
static bool scheme_char_is_separator( URL_SCHEME scheme, WCHAR c )
{
if (c == '/')
return true;
if (c == '\\' && scheme != URL_SCHEME_INVALID && scheme != URL_SCHEME_UNKNOWN)
return true;
return false;
}
wk1 = src_url;
wk2 = url;
do
static bool scheme_char_is_hostname_separator( URL_SCHEME scheme, DWORD flags, WCHAR c )
{
switch (c)
{
while(*wk1 == '\t')
wk1++;
*wk2++ = *wk1;
} while (*wk1++);
case 0:
case '/':
return true;
case '\\':
return !scheme_preserves_backslashes( scheme );
case '?':
return scheme != URL_SCHEME_FILE || (flags & (URL_WININET_COMPATIBILITY | URL_FILE_USE_PATHURL));
case '#':
return scheme != URL_SCHEME_FILE;
default:
return false;
}
}
/* Allocate memory for simplified URL (before escaping) */
nByteLen = (wk2-url)*sizeof(WCHAR);
url_copy = heap_alloc(nByteLen + sizeof(L"file:///"));
if (!url_copy)
static bool scheme_char_is_dot_separator( URL_SCHEME scheme, DWORD flags, WCHAR c )
{
switch (c)
{
heap_free(url);
return E_OUTOFMEMORY;
case 0:
case '/':
case '?':
return true;
case '#':
return (scheme != URL_SCHEME_FILE || !(flags & (URL_WININET_COMPATIBILITY | URL_FILE_USE_PATHURL)));
case '\\':
return (scheme != URL_SCHEME_INVALID && scheme != URL_SCHEME_UNKNOWN && scheme != URL_SCHEME_MK);
default:
return false;
}
}
is_file_url = !wcsncmp(url, L"file:", 5);
/* There are essentially two types of behaviour concerning dot simplification,
* not counting opaque schemes:
*
* 1) Simplify dots if and only if the first element is not a single or double
* dot. If a double dot would rewind past the root, ignore it. For example:
*
* http://hostname/a/../../b/. -> http://hostname/b/
* http://hostname/./../../b/. -> http://hostname/./../../b/.
*
* 2) Effectively treat all paths as relative. Always simplify, except if a
* double dot would rewind past the root, in which case emit it verbatim.
* For example:
*
* wine://hostname/a/../../b/. -> wine://hostname/../b/
* wine://hostname/./../../b/. -> wine://hostname/../b/
*
* For unclear reasons, this behaviour also correlates with whether a final
* slash is always emitted after a single or double dot (e.g. if
* URL_DONT_SIMPLIFY is specified). The former type does not emit a slash; the
* latter does.
*/
static bool scheme_is_always_relative( URL_SCHEME scheme, DWORD flags )
{
switch (scheme)
{
case URL_SCHEME_INVALID:
case URL_SCHEME_UNKNOWN:
return true;
if ((nByteLen >= 5*sizeof(WCHAR) && !wcsncmp(url, L"http:", 5)) || is_file_url)
slash = '/';
case URL_SCHEME_FILE:
return flags & (URL_WININET_COMPATIBILITY | URL_FILE_USE_PATHURL);
if ((flags & (URL_FILE_USE_PATHURL | URL_WININET_COMPATIBILITY)) && is_file_url)
slash = '\\';
default:
return false;
}
}
struct string_buffer
{
WCHAR *string;
size_t len, capacity;
};
static void append_string( struct string_buffer *buffer, const WCHAR *str, size_t len )
{
array_reserve( (void **)&buffer->string, &buffer->capacity, buffer->len + len, sizeof(WCHAR) );
memcpy( buffer->string + buffer->len, str, len * sizeof(WCHAR) );
buffer->len += len;
}
static void append_char( struct string_buffer *buffer, WCHAR c )
{
append_string( buffer, &c, 1 );
}
if (nByteLen >= 4*sizeof(WCHAR) && !wcsncmp(url, L"res:", 4))
static char get_slash_dir( URL_SCHEME scheme, DWORD flags, char src, const struct string_buffer *dst )
{
if (src && scheme_preserves_backslashes( scheme ))
return src;
if (scheme == URL_SCHEME_FILE && (flags & (URL_FILE_USE_PATHURL | URL_WININET_COMPATIBILITY))
&& !wmemchr( dst->string, '#', dst->len ))
return '\\';
return '/';
}
static void rewrite_url( struct string_buffer *dst, const WCHAR *url, DWORD *flags_ptr )
{
DWORD flags = *flags_ptr;
bool pathurl = (flags & (URL_FILE_USE_PATHURL | URL_WININET_COMPATIBILITY));
bool is_relative = false, has_hostname = false, has_initial_slash = false;
const WCHAR *query = NULL, *hash = NULL;
URL_SCHEME scheme = URL_SCHEME_INVALID;
size_t query_len = 0, hash_len = 0;
const WCHAR *scheme_end, *src_end;
const WCHAR *hostname = NULL;
size_t hostname_len = 0;
const WCHAR *src = url;
size_t root_offset;
/* Determine the scheme. */
scheme_end = parse_scheme( url );
if (*scheme_end == ':' && scheme_end >= url + 2)
{
flags &= ~URL_FILE_USE_PATHURL;
slash = '\0';
size_t scheme_len = scheme_end + 1 - url;
scheme = get_scheme_code( url, scheme_len - 1 );
for (size_t i = 0; i < scheme_len; ++i)
append_char( dst, tolower( *src++ ));
}
else if (url[0] == '\\' && url[1] == '\\')
{
append_string( dst, L"file:", 5 );
if (!pathurl && !(flags & URL_UNESCAPE))
flags |= URL_ESCAPE_UNSAFE | URL_ESCAPE_PERCENT;
scheme = URL_SCHEME_FILE;
/*
* state =
* 0 initial 1,3
* 1 have 2[+] alnum 2,3
* 2 have scheme (found :) 4,6,3
* 3 failed (no location)
* 4 have // 5,3
* 5 have 1[+] alnum 6,3
* 6 have location (found /) save root location
*/
has_hostname = true;
}
wk1 = url;
wk2 = url_copy;
state = 0;
if (is_escaped_drive_spec( url ))
{
append_string( dst, L"file://", 7 );
if (!pathurl && !(flags & URL_UNESCAPE))
flags |= URL_ESCAPE_UNSAFE | URL_ESCAPE_PERCENT;
scheme = URL_SCHEME_FILE;
/* Assume path */
if (url[1] == ':')
hostname_len = 0;
has_hostname = true;
}
else if (scheme == URL_SCHEME_MK)
{
lstrcpyW(wk2, L"file:///");
wk2 += lstrlenW(wk2);
if (flags & (URL_FILE_USE_PATHURL | URL_WININET_COMPATIBILITY))
if (src[0] == '@')
{
slash = '\\';
--wk2;
while (*src && *src != '/')
append_char( dst, *src++ );
if (*src == '/')
append_char( dst, *src++ );
else
append_char( dst, '/' );
if ((src[0] == '.' && scheme_char_is_dot_separator( scheme, flags, src[1] )) ||
(src[0] == '.' && src[1] == '.' && scheme_char_is_dot_separator( scheme, flags, src[2] )))
is_relative = true;
}
else
flags |= URL_ESCAPE_UNSAFE;
state = 5;
is_file_url = TRUE;
}
else if (url[0] == '/')
else if (scheme_uses_hostname( scheme ) && scheme_char_is_separator( scheme, src[0] )
&& scheme_char_is_separator( scheme, src[1] ))
{
state = 5;
is_file_url = TRUE;
append_char( dst, scheme_preserves_backslashes( scheme ) ? src[0] : '/' );
append_char( dst, scheme_preserves_backslashes( scheme ) ? src[1] : '/' );
src += 2;
if (scheme == URL_SCHEME_FILE && is_slash( src[0] ) && is_slash( src[1] ))
{
while (is_slash( *src ))
++src;
}
hostname = src;
while (!scheme_char_is_hostname_separator( scheme, flags, *src ))
++src;
hostname_len = src - hostname;
has_hostname = true;
has_initial_slash = true;
}
else if (scheme_char_is_separator( scheme, src[0] ))
{
has_initial_slash = true;
if (scheme == URL_SCHEME_UNKNOWN || scheme == URL_SCHEME_INVALID)
{
/* Special case: an unknown scheme starting with a single slash
* considers the "root" to be the single slash.
* Most other schemes treat it as an empty path segment instead. */
append_char( dst, *src++ );
if (*src == '\\')
++src;
}
else if (scheme == URL_SCHEME_FILE)
{
src++;
while (*wk1)
append_string( dst, L"//", 2 );
hostname_len = 0;
has_hostname = true;
}
}
else
{
switch (state)
if (scheme == URL_SCHEME_FILE)
{
case 0:
if (!iswalnum(*wk1)) {state = 3; break;}
*wk2++ = *wk1++;
if (!iswalnum(*wk1)) {state = 3; break;}
*wk2++ = *wk1++;
state = 1;
break;
case 1:
*wk2++ = *wk1;
if (*wk1++ == ':') state = 2;
break;
case 2:
*wk2++ = *wk1++;
if (*wk1 != '/') {state = 6; break;}
*wk2++ = *wk1++;
if ((flags & URL_FILE_USE_PATHURL) && nByteLen >= 9*sizeof(WCHAR) && is_file_url
&& !wcsncmp(wk1, L"localhost", 9))
if (is_escaped_drive_spec( src ))
{
append_string( dst, L"//", 2 );
hostname_len = 0;
has_hostname = true;
}
else
{
wk1 += 9;
while (*wk1 == '\\' && (flags & URL_FILE_USE_PATHURL))
wk1++;
if (flags & URL_FILE_USE_PATHURL)
append_string( dst, L"//", 2 );
}
}
}
if (scheme == URL_SCHEME_FILE && (flags & URL_FILE_USE_PATHURL))
flags |= URL_UNESCAPE;
*flags_ptr = flags;
if (has_hostname)
{
if (scheme == URL_SCHEME_FILE)
{
bool is_drive = false;
if (is_slash( *src ))
++src;
if (*wk1 == '/' && (flags & URL_FILE_USE_PATHURL))
wk1++;
else if (is_file_url)
if (hostname_len >= 2 && is_escaped_drive_spec( hostname ))
{
hostname_len = 0;
src = hostname;
is_drive = true;
}
else if (is_escaped_drive_spec( src ))
{
const WCHAR *body = wk1;
is_drive = true;
}
while (*body == '/')
++body;
if (pathurl)
{
if (hostname_len == 9 && !wcsnicmp( hostname, L"localhost", 9 ))
{
hostname_len = 0;
if (is_slash( *src ))
++src;
if (is_escaped_drive_spec( src ))
is_drive = true;
}
if (is_drive_spec( body ))
if (!is_drive)
{
if (!(flags & (URL_WININET_COMPATIBILITY | URL_FILE_USE_PATHURL)))
if (hostname_len)
{
if (slash)
*wk2++ = slash;
else
*wk2++ = '/';
append_string( dst, L"\\\\", 2 );
append_string( dst, hostname, hostname_len );
}
if ((*src && *src != '?') || (flags & URL_WININET_COMPATIBILITY))
append_char( dst, get_slash_dir( scheme, flags, 0, dst ));
}
else
}
else
{
if (hostname_len)
append_string( dst, hostname, hostname_len );
append_char( dst, '/' );
}
if (is_drive)
{
/* Root starts after the first slash when file flags are in use,
* but directly after the drive specification if not. */
if (pathurl)
{
if (flags & URL_WININET_COMPATIBILITY)
while (!scheme_char_is_hostname_separator( scheme, flags, *src ))
append_char( dst, *src++ );
if (is_slash( *src ))
{
if (*wk1 == '/' && *(wk1 + 1) != '/')
{
*wk2++ = '\\';
}
else
{
*wk2++ = '\\';
*wk2++ = '\\';
}
}
else
{
if (*wk1 == '/' && *(wk1+1) != '/')
{
if (slash)
*wk2++ = slash;
else
*wk2++ = '/';
}
append_char( dst, '\\' );
src++;
}
}
wk1 = body;
}
state = 4;
break;
case 3:
nWkLen = lstrlenW(wk1);
memcpy(wk2, wk1, (nWkLen + 1) * sizeof(WCHAR));
mp = wk2;
wk1 += nWkLen;
wk2 += nWkLen;
if (slash)
{
while (mp < wk2)
else
{
if (*mp == '/' || *mp == '\\')
*mp = slash;
mp++;
append_char( dst, *src++ );
append_char( dst, *src++ );
if (is_slash( *src ))
{
append_char( dst, '/' );
src++;
}
}
}
break;
case 4:
if (!iswalnum(*wk1) && (*wk1 != '-') && (*wk1 != '.') && (*wk1 != ':'))
{
state = 3;
break;
}
while (iswalnum(*wk1) || (*wk1 == '-') || (*wk1 == '.') || (*wk1 == ':'))
*wk2++ = *wk1++;
state = 5;
if (!*wk1)
}
else
{
for (size_t i = 0; i < hostname_len; ++i)
{
if (slash)
*wk2++ = slash;
if (scheme == URL_SCHEME_UNKNOWN || scheme == URL_SCHEME_INVALID)
append_char( dst, hostname[i] );
else
*wk2++ = '/';
append_char( dst, tolower( hostname[i] ));
}
break;
case 5:
if (*wk1 != '/' && *wk1 != '\\')
if (*src == '/' || *src == '\\')
{
state = 3;
break;
append_char( dst, scheme_preserves_backslashes( scheme ) ? *src : '/' );
src++;
}
while (*wk1 == '/' || *wk1 == '\\')
else
{
if (slash)
*wk2++ = slash;
else
*wk2++ = *wk1;
wk1++;
append_char( dst, '/' );
}
state = 6;
break;
case 6:
if (flags & URL_DONT_SIMPLIFY)
}
if ((src[0] == '.' && scheme_char_is_dot_separator( scheme, flags, src[1] )) ||
(src[0] == '.' && src[1] == '.' && scheme_char_is_dot_separator( scheme, flags, src[2] )))
{
if (!scheme_is_always_relative( scheme, flags ))
is_relative = true;
}
}
/* root_offset now points to the point past which we will not rewind.
* If there is a hostname, it points to the character after the closing
* slash. */
root_offset = dst->len;
/* Break up the rest of the URL into the body, query, and hash parts. */
src_end = src + wcslen( src );
if (scheme_is_opaque( scheme ))
{
/* +1 for null terminator */
append_string( dst, src, src_end + 1 - src );
return;
}
if (scheme == URL_SCHEME_FILE)
{
if (!pathurl)
{
if (src[0] == '#')
hash = src;
else if (is_slash( src[0] ) && src[1] == '#')
hash = src + 1;
if (src[0] == '?')
query = src;
else if (is_slash( src[0] ) && src[1] == '?')
query = src + 1;
}
else
{
query = wcschr( src, '?' );
}
if (!hash)
{
for (const WCHAR *p = src; p < src_end; ++p)
{
state = 3;
break;
if (!wcsnicmp( p, L".htm#" , 5))
hash = p + 4;
else if (!wcsnicmp( p, L".html#", 6 ))
hash = p + 5;
}
}
}
else
{
query = wcschr( src, '?' );
hash = wcschr( src, '#' );
}
/* Now at root location, cannot back up any more. */
/* "root" will point at the '/' */
if (query)
query_len = ((hash && hash > query) ? hash : src_end) - query;
if (hash)
hash_len = ((query && query > hash) ? query : src_end) - hash;
if (query)
src_end = query;
if (hash && hash < src_end)
src_end = hash;
if (scheme == URL_SCHEME_UNKNOWN && !has_initial_slash)
{
if (!(flags & URL_DONT_SIMPLIFY) && src[0] == '.' && src_end == src + 1)
src++;
flags |= URL_DONT_SIMPLIFY;
}
root = wk2-1;
while (*wk1)
while (src < src_end)
{
bool is_dots = false;
size_t len;
for (len = 0; src + len < src_end && !scheme_char_is_separator( scheme, src[len] ); ++len)
;
if (src[0] == '.' && scheme_char_is_dot_separator( scheme, flags, src[1] ))
{
if (!is_relative)
{
mp = wcschr(wk1, '/');
mp2 = wcschr(wk1, '\\');
if (mp2 && (!mp || mp2 < mp))
mp = mp2;
if (!mp)
if (flags & URL_DONT_SIMPLIFY)
{
is_dots = true;
}
else
{
nWkLen = lstrlenW(wk1);
memcpy(wk2, wk1, (nWkLen + 1) * sizeof(WCHAR));
wk1 += nWkLen;
wk2 += nWkLen;
++src;
if (*src == '/' || *src == '\\')
++src;
continue;
}
nLen = mp - wk1;
if (nLen)
}
}
else if (src[0] == '.' && src[1] == '.' && scheme_char_is_dot_separator( scheme, flags, src[2] ))
{
if (!is_relative)
{
if (flags & URL_DONT_SIMPLIFY)
{
memcpy(wk2, wk1, nLen * sizeof(WCHAR));
wk2 += nLen;
wk1 += nLen;
is_dots = true;
}
if (slash)
*wk2++ = slash;
else
*wk2++ = *wk1;
wk1++;
while (*wk1 == '.')
else if (dst->len == root_offset && scheme_is_always_relative( scheme, flags ))
{
TRACE("found '/.'\n");
if (wk1[1] == '/' || wk1[1] == '\\')
{
/* case of /./ -> skip the ./ */
wk1 += 2;
}
else if (wk1[1] == '.' && (wk1[2] == '/' || wk1[2] == '\\' || wk1[2] == '?'
|| wk1[2] == '#' || !wk1[2]))
{
/* case /../ -> need to backup wk2 */
TRACE("found '/../'\n");
*(wk2-1) = '\0'; /* set end of string */
mp = wcsrchr(root, '/');
mp2 = wcsrchr(root, '\\');
if (mp2 && (!mp || mp2 < mp))
mp = mp2;
if (mp && (mp >= root))
{
/* found valid backup point */
wk2 = mp + 1;
if(wk1[2] != '/' && wk1[2] != '\\')
wk1 += 2;
else
wk1 += 3;
}
else
{
/* did not find point, restore '/' */
*(wk2-1) = slash;
break;
}
}
/* We could also use is_dots here, except that we need to
* update root afterwards. */
append_char( dst, *src++ );
append_char( dst, *src++ );
if (*src == '/' || *src == '\\')
append_char( dst, get_slash_dir( scheme, flags, *src++, dst ));
else
break;
append_char( dst, get_slash_dir( scheme, flags, 0, dst ));
root_offset = dst->len;
continue;
}
else
{
if (dst->len > root_offset)
--dst->len; /* rewind past the last slash */
while (dst->len > root_offset && !scheme_char_is_separator( scheme, dst->string[dst->len - 1] ))
--dst->len;
src += 2;
if (*src == '/' || *src == '\\')
++src;
continue;
}
}
*wk2 = '\0';
break;
default:
FIXME("how did we get here - state=%d\n", state);
heap_free(url_copy);
heap_free(url);
return E_INVALIDARG;
}
*wk2 = '\0';
TRACE("Simplified, orig <%s>, simple <%s>\n", wine_dbgstr_w(src_url), wine_dbgstr_w(url_copy));
if (len)
{
append_string( dst, src, len );
src += len;
}
if (*src == '?' || *src == '#' || !*src)
{
if (scheme == URL_SCHEME_UNKNOWN && !has_initial_slash)
is_dots = false;
if (is_dots && scheme_is_always_relative( scheme, flags ))
append_char( dst, get_slash_dir( scheme, flags, 0, dst ));
}
else /* slash */
{
append_char( dst, get_slash_dir( scheme, flags, *src++, dst ));
}
}
nLen = lstrlenW(url_copy);
while ((nLen > 0) && ((url_copy[nLen-1] <= ' ')))
url_copy[--nLen]=0;
if ((flags & URL_UNESCAPE) ||
((flags & URL_FILE_USE_PATHURL) && nByteLen >= 5*sizeof(WCHAR) && !wcsncmp(url, L"file:", 5)))
/* If the source was non-empty but collapsed to an empty string, output a
* single slash. */
if (!dst->len && src_end != url)
append_char( dst, '/' );
/* UNKNOWN and FILE schemes usually reorder the ? before the #, but others
* emit them in the original order. */
if (query && hash && scheme != URL_SCHEME_FILE && scheme != URL_SCHEME_INVALID && scheme != URL_SCHEME_UNKNOWN)
{
if (query < hash)
{
append_string( dst, query, query_len );
append_string( dst, hash, hash_len );
}
else
{
append_string( dst, hash, hash_len );
append_string( dst, query, query_len );
}
}
else if (!(scheme == URL_SCHEME_FILE && (flags & URL_FILE_USE_PATHURL)))
{
UrlUnescapeW(url_copy, NULL, &nLen, URL_UNESCAPE_INPLACE);
if (query)
append_string( dst, query, query_len );
if (hash)
append_string( dst, hash, hash_len );
}
append_char( dst, 0 );
}
HRESULT WINAPI UrlCanonicalizeW(const WCHAR *src_url, WCHAR *canonicalized, DWORD *canonicalized_len, DWORD flags)
{
struct string_buffer rewritten = {0};
DWORD escape_flags;
HRESULT hr = S_OK;
const WCHAR *src;
WCHAR *url, *dst;
DWORD len;
TRACE("%s, %p, %p, %#lx\n", wine_dbgstr_w(src_url), canonicalized, canonicalized_len, flags);
if (!src_url || !canonicalized || !canonicalized_len || !*canonicalized_len)
return E_INVALIDARG;
if (!*src_url)
{
*canonicalized = 0;
return S_OK;
}
/* PATHURL takes precedence. */
if (flags & URL_FILE_USE_PATHURL)
flags &= ~URL_WININET_COMPATIBILITY;
/* strip initial and final C0 control characters and space */
src = src_url;
while (*src > 0 && *src <= 0x20)
++src;
len = wcslen( src );
while (len && src[len - 1] > 0 && src[len - 1] <= 0x20)
--len;
if (!(url = HeapAlloc( GetProcessHeap(), 0, (len + 1) * sizeof(WCHAR) )))
return E_OUTOFMEMORY;
dst = url;
for (size_t i = 0; i < len; ++i)
{
if (src[i] != '\t' && src[i] != '\n' && src[i] != '\r')
*dst++ = src[i];
}
*dst++ = 0;
rewrite_url( &rewritten, url, &flags );
if (flags & URL_UNESCAPE)
{
len = rewritten.len;
UrlUnescapeW( rewritten.string, NULL, &len, URL_UNESCAPE_INPLACE);
rewritten.len = wcslen( rewritten.string ) + 1;
}
/* URL_ESCAPE_SEGMENT_ONLY seems to be ignored. */
escape_flags = flags & (URL_ESCAPE_UNSAFE | URL_ESCAPE_SPACES_ONLY | URL_ESCAPE_PERCENT |
URL_DONT_ESCAPE_EXTRA_INFO | URL_ESCAPE_SEGMENT_ONLY);
URL_DONT_ESCAPE_EXTRA_INFO);
if (escape_flags)
{
escape_flags &= ~URL_ESCAPE_UNSAFE;
hr = UrlEscapeW(url_copy, canonicalized, canonicalized_len, escape_flags);
hr = UrlEscapeW( rewritten.string, canonicalized, canonicalized_len, escape_flags );
}
else
{
/* No escaping needed, just copy the string */
nLen = lstrlenW(url_copy);
if (nLen < *canonicalized_len)
memcpy(canonicalized, url_copy, (nLen + 1)*sizeof(WCHAR));
if (rewritten.len <= *canonicalized_len)
{
memcpy( canonicalized, rewritten.string, rewritten.len * sizeof(WCHAR) );
*canonicalized_len = rewritten.len - 1;
}
else
{
hr = E_POINTER;
nLen++;
*canonicalized_len = rewritten.len;
}
*canonicalized_len = nLen;
}
heap_free(url_copy);
heap_free(url);
heap_free( rewritten.string );
heap_free( url );
if (hr == S_OK)
TRACE("result %s\n", wine_dbgstr_w(canonicalized));
......@@ -4223,13 +4583,6 @@ HRESULT WINAPI UrlGetPartA(const char *url, char *out, DWORD *out_len, DWORD par
return hr;
}
static const WCHAR *parse_scheme( const WCHAR *p )
{
while (iswalnum( *p ) || *p == '+' || *p == '-' || *p == '.')
++p;
return p;
}
static const WCHAR *parse_url_element( const WCHAR *url, const WCHAR *separators )
{
const WCHAR *p;
......@@ -4239,11 +4592,6 @@ static const WCHAR *parse_url_element( const WCHAR *url, const WCHAR *separators
return url + wcslen( url );
}
static BOOL is_slash( char c )
{
return c == '/' || c == '\\';
}
static void parse_url( const WCHAR *url, struct parsed_url *pl )
{
const WCHAR *work;
......
......@@ -772,7 +772,7 @@ static void test_UrlGetPart(void)
}
/* ########################### */
static void check_url_canonicalize(const char *url, DWORD flags, const char *expect, BOOL todo)
static void check_url_canonicalize(const char *url, DWORD flags, const char *expect)
{
char output[INTERNET_MAX_URL_LENGTH];
WCHAR outputW[INTERNET_MAX_URL_LENGTH];
......@@ -788,8 +788,7 @@ static void check_url_canonicalize(const char *url, DWORD flags, const char *exp
ok(hr == E_INVALIDARG, "Got unexpected hr %#lx.\n", hr);
hr = UrlCanonicalizeA(url, output, &size, flags);
ok(hr == S_OK || (!url[0] && hr == S_FALSE) /* Vista+ */, "Got unexpected hr %#lx.\n", hr);
todo_wine_if (todo)
ok(!strcmp(output, expect), "Expected %s, got %s.\n", debugstr_a(expect), debugstr_a(output));
ok(!strcmp(output, expect), "Expected %s, got %s.\n", debugstr_a(expect), debugstr_a(output));
size = INTERNET_MAX_URL_LENGTH;
hr = UrlCanonicalizeW(urlW, NULL, &size, flags);
......@@ -967,12 +966,11 @@ static void test_UrlCanonicalizeA(void)
const char *url;
DWORD flags;
const char *expect;
BOOL todo;
}
tests[] =
{
{"", 0, ""},
{"http://www.winehq.org/tests/../tests/../..", 0, "http://www.winehq.org/", TRUE},
{"http://www.winehq.org/tests/../tests/../..", 0, "http://www.winehq.org/"},
{"http://www.winehq.org/..", 0, "http://www.winehq.org/.."},
{"http://www.winehq.org/tests/tests2/../../tests", 0, "http://www.winehq.org/tests"},
{"http://www.winehq.org/tests/../tests", 0, "http://www.winehq.org/tests"},
......@@ -1066,7 +1064,7 @@ static void test_UrlCanonicalizeA(void)
{"///A/../B", URL_WININET_COMPATIBILITY, "///B"},
{"A", 0, "A"},
{"../A", 0, "../A"},
{"A/../B", 0, "B", TRUE},
{"A/../B", 0, "B"},
{"/uri-res/N2R?urn:sha1:B3K", URL_DONT_ESCAPE_EXTRA_INFO | URL_WININET_COMPATIBILITY /*0x82000000*/, "/uri-res/N2R?urn:sha1:B3K"} /*LimeWire online installer calls this*/,
{"http:www.winehq.org/dir/../index.html", 0, "http:www.winehq.org/index.html"},
{"http://localhost/test.html", URL_FILE_USE_PATHURL, "http://localhost/test.html"},
......@@ -1158,7 +1156,7 @@ static void test_UrlCanonicalizeA(void)
/* test url-modification */
for (i = 0; i < ARRAY_SIZE(tests); i++)
check_url_canonicalize(tests[i].url, tests[i].flags, tests[i].expect, tests[i].todo);
check_url_canonicalize(tests[i].url, tests[i].flags, tests[i].expect);
}
/* ########################### */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment