Commit 5758b068 authored by Thomas Mullaly's avatar Thomas Mullaly Committed by Alexandre Julliard

urlmon: Implemented a function which removes dot segments from paths of hierarchical URIs.

parent 300d0e1a
......@@ -2454,6 +2454,211 @@ static const uri_properties uri_tests[] = {
{URL_SCHEME_HTTP,S_OK,FALSE},
{URLZONE_INVALID,E_NOTIMPL,FALSE}
}
},
{ "http://gov.uk/test/../test2/././../test3/.././././", 0, S_OK, FALSE,
Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_HOST|
Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|
Uri_HAS_HOST_TYPE|Uri_HAS_PORT|Uri_HAS_SCHEME,
TRUE,
{
{"http://gov.uk/",S_OK,TRUE},
{"gov.uk",S_OK,FALSE},
{"http://gov.uk/",S_OK,TRUE},
{"",S_FALSE,FALSE},
{"",S_FALSE,TRUE},
{"",S_FALSE,TRUE},
{"gov.uk",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"/",S_OK,TRUE},
{"/",S_OK,TRUE},
{"",S_FALSE,TRUE},
{"http://gov.uk/test/../test2/././../test3/.././././",S_OK,FALSE},
{"http",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE}
},
{
{Uri_HOST_DNS,S_OK,FALSE},
{80,S_OK,FALSE},
{URL_SCHEME_HTTP,S_OK,FALSE},
{URLZONE_INVALID,E_NOTIMPL,FALSE}
}
},
{ "http://gov.uk/test/test2/../../..", 0, S_OK, FALSE,
Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_HOST|
Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|
Uri_HAS_HOST_TYPE|Uri_HAS_PORT|Uri_HAS_SCHEME,
TRUE,
{
{"http://gov.uk/",S_OK,TRUE},
{"gov.uk",S_OK,FALSE},
{"http://gov.uk/",S_OK,TRUE},
{"",S_FALSE,FALSE},
{"",S_FALSE,TRUE},
{"",S_FALSE,TRUE},
{"gov.uk",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"/",S_OK,TRUE},
{"/",S_OK,TRUE},
{"",S_FALSE,TRUE},
{"http://gov.uk/test/test2/../../..",S_OK,FALSE},
{"http",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE}
},
{
{Uri_HOST_DNS,S_OK,FALSE},
{80,S_OK,FALSE},
{URL_SCHEME_HTTP,S_OK,FALSE},
{URLZONE_INVALID,E_NOTIMPL,FALSE}
}
},
{ "http://gov.uk/test/test2/../../.", 0, S_OK, FALSE,
Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_HOST|
Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|
Uri_HAS_HOST_TYPE|Uri_HAS_PORT|Uri_HAS_SCHEME,
TRUE,
{
{"http://gov.uk/",S_OK,TRUE},
{"gov.uk",S_OK,FALSE},
{"http://gov.uk/",S_OK,TRUE},
{"",S_FALSE,FALSE},
{"",S_FALSE,TRUE},
{"",S_FALSE,TRUE},
{"gov.uk",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"/",S_OK,TRUE},
{"/",S_OK,TRUE},
{"",S_FALSE,TRUE},
{"http://gov.uk/test/test2/../../.",S_OK,FALSE},
{"http",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE}
},
{
{Uri_HOST_DNS,S_OK,FALSE},
{80,S_OK,FALSE},
{URL_SCHEME_HTTP,S_OK,FALSE},
{URLZONE_INVALID,E_NOTIMPL,FALSE}
}
},
{ "file://c:\\tests\\../tests\\./.\\..\\foo%20bar.mp3", 0, S_OK, FALSE,
Uri_HAS_ABSOLUTE_URI|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|Uri_HAS_PATH|
Uri_HAS_PATH_AND_QUERY|Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
TRUE,
{
{"file:///c:/foo%2520bar.mp3",S_OK,TRUE},
{"",S_FALSE,FALSE},
{"file:///c:/foo%2520bar.mp3",S_OK,TRUE},
{"",S_FALSE,FALSE},
{".mp3",S_OK,TRUE},
{"",S_FALSE,TRUE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE},
{"/c:/foo%2520bar.mp3",S_OK,TRUE},
{"/c:/foo%2520bar.mp3",S_OK,TRUE},
{"",S_FALSE,TRUE},
{"file://c:\\tests\\../tests\\./.\\..\\foo%20bar.mp3",S_OK,FALSE},
{"file",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE}
},
{
{Uri_HOST_UNKNOWN,S_OK,FALSE},
{0,S_FALSE,FALSE},
{URL_SCHEME_FILE,S_OK,FALSE},
{URLZONE_INVALID,E_NOTIMPL,FALSE}
}
},
/* Dot removal happens for unknown scheme types. */
{ "zip://gov.uk/test/test2/../../.", 0, S_OK, FALSE,
Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_HOST|
Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|
Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
TRUE,
{
{"zip://gov.uk/",S_OK,TRUE},
{"gov.uk",S_OK,FALSE},
{"zip://gov.uk/",S_OK,TRUE},
{"",S_FALSE,FALSE},
{"",S_FALSE,TRUE},
{"",S_FALSE,TRUE},
{"gov.uk",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"/",S_OK,TRUE},
{"/",S_OK,TRUE},
{"",S_FALSE,TRUE},
{"zip://gov.uk/test/test2/../../.",S_OK,FALSE},
{"zip",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE}
},
{
{Uri_HOST_DNS,S_OK,FALSE},
{0,S_FALSE,FALSE},
{URL_SCHEME_UNKNOWN,S_OK,FALSE},
{URLZONE_INVALID,E_NOTIMPL,FALSE}
}
},
/* Dot removal doesn't happen if NO_CANONICALIZE is set. */
{ "http://gov.uk/test/test2/../../.", Uri_CREATE_NO_CANONICALIZE, S_OK, FALSE,
Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|
Uri_HAS_HOST|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|
Uri_HAS_HOST_TYPE|Uri_HAS_PORT|Uri_HAS_SCHEME,
TRUE,
{
{"http://gov.uk/test/test2/../../.",S_OK,TRUE},
{"gov.uk",S_OK,FALSE},
{"http://gov.uk/test/test2/../../.",S_OK,TRUE},
{"",S_FALSE,FALSE},
{".",S_OK,TRUE},
{"",S_FALSE,TRUE},
{"gov.uk",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"/test/test2/../../.",S_OK,TRUE},
{"/test/test2/../../.",S_OK,TRUE},
{"",S_FALSE,TRUE},
{"http://gov.uk/test/test2/../../.",S_OK,FALSE},
{"http",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE}
},
{
{Uri_HOST_DNS,S_OK,FALSE},
{80,S_OK,FALSE},
{URL_SCHEME_HTTP,S_OK,FALSE},
{URLZONE_INVALID,E_NOTIMPL,FALSE}
}
},
/* Dot removal doesn't happen for wildcard scheme types. */
{ "*:gov.uk/test/test2/../../.", 0, S_OK, FALSE,
Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_EXTENSION|
Uri_HAS_HOST|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|
Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
TRUE,
{
{"*:gov.uk/test/test2/../../.",S_OK,TRUE},
{"gov.uk",S_OK,FALSE},
{"*:gov.uk/test/test2/../../.",S_OK,TRUE},
{"",S_FALSE,FALSE},
{".",S_OK,TRUE},
{"",S_FALSE,TRUE},
{"gov.uk",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"/test/test2/../../.",S_OK,TRUE},
{"/test/test2/../../.",S_OK,TRUE},
{"",S_FALSE,TRUE},
{"*:gov.uk/test/test2/../../.",S_OK,FALSE},
{"*",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE}
},
{
{Uri_HOST_DNS,S_OK,FALSE},
{0,S_FALSE,FALSE},
{URL_SCHEME_WILDCARD,S_OK,FALSE},
{URLZONE_INVALID,E_NOTIMPL,FALSE}
}
}
};
......
......@@ -471,6 +471,70 @@ static void find_domain_name(const WCHAR *host, DWORD host_len,
(host+host_len)-(host+*domain_start)));
}
/* Removes the dot segments from a heirarchical URIs path component. This
* function performs the removal in place.
*
* This is a modified version of Qt's QUrl function "removeDotsFromPath".
*
* This function returns the new length of the path string.
*/
static DWORD remove_dot_segments(WCHAR *path, DWORD path_len) {
WCHAR *out = path;
const WCHAR *in = out;
const WCHAR *end = out + path_len;
DWORD len;
while(in < end) {
/* A. if the input buffer begins with a prefix of "/./" or "/.",
* where "." is a complete path segment, then replace that
* prefix with "/" in the input buffer; otherwise,
*/
if(in <= end - 3 && in[0] == '/' && in[1] == '.' && in[2] == '/') {
in += 2;
continue;
} else if(in == end - 2 && in[0] == '/' && in[1] == '.') {
*out++ = '/';
in += 2;
break;
}
/* B. if the input buffer begins with a prefix of "/../" or "/..",
* where ".." is a complete path segment, then replace that
* prefix with "/" in the input buffer and remove the last
* segment and its preceding "/" (if any) from the output
* buffer; otherwise,
*/
if(in <= end - 4 && in[0] == '/' && in[1] == '.' && in[2] == '.' && in[3] == '/') {
while(out > path && *(--out) != '/');
in += 3;
continue;
} else if(in == end - 3 && in[0] == '/' && in[1] == '.' && in[2] == '.') {
while(out > path && *(--out) != '/');
if(*out == '/')
++out;
in += 3;
break;
}
/* C. move the first path segment in the input buffer to the end of
* the output buffer, including the initial "/" character (if
* any) and any subsequent characters up to, but not including,
* the next "/" character or the end of the input buffer.
*/
*out++ = *in++;
while(in < end && *in != '/')
*out++ = *in++;
}
len = out - path;
TRACE("(%p %d): Path after dot segments removed %s len=%d\n", path, path_len,
debugstr_wn(path, len), len);
return len;
}
/* Computes the location where the elision should occur in the IPv6
* address using the numerical values of each component stored in
* 'values'. If the address shouldn't contain an elision then 'index'
......@@ -2370,6 +2434,20 @@ static BOOL canonicalize_path_hierarchical(const parse_data *data, Uri *uri,
uri->path_len = uri->canon_len - uri->path_start;
/* Removing the dot segments only happens when it's not in
* computeOnly mode and it's not a wildcard scheme.
*/
if(!computeOnly && data->scheme_type != URL_SCHEME_WILDCARD) {
if(!(flags & Uri_CREATE_NO_CANONICALIZE)) {
/* Remove the dot segments (if any) and reset everything to the new
* correct length.
*/
DWORD new_len = remove_dot_segments(uri->canon_uri+uri->path_start, uri->path_len);
uri->canon_len -= uri->path_len-new_len;
uri->path_len = new_len;
}
}
if(!computeOnly)
TRACE("Canonicalized path %s len=%d\n",
debugstr_wn(uri->canon_uri+uri->path_start, uri->path_len),
......@@ -2515,6 +2593,7 @@ static HRESULT canonicalize_uri(const parse_data *data, Uri *uri, DWORD flags) {
if(!uri->canon_uri)
return E_OUTOFMEMORY;
uri->canon_size = len;
if(!canonicalize_scheme(data, uri, flags, FALSE)) {
ERR("(%p %p %x): Unable to canonicalize the scheme of the URI.\n", data, uri, flags);
heap_free(uri->canon_uri);
......@@ -2528,6 +2607,21 @@ static HRESULT canonicalize_uri(const parse_data *data, Uri *uri, DWORD flags) {
return E_INVALIDARG;
}
/* There's a possibility we didn't use all the space we allocated
* earlier.
*/
if(uri->canon_len < uri->canon_size) {
/* This happens if the URI is hierarchical and dot
* segments were removed from it's path.
*/
WCHAR *tmp = heap_realloc(uri->canon_uri, (uri->canon_len+1)*sizeof(WCHAR));
if(!tmp)
return E_OUTOFMEMORY;
uri->canon_uri = tmp;
uri->canon_size = uri->canon_len;
}
uri->canon_uri[uri->canon_len] = '\0';
TRACE("(%p %p %x): finished canonicalizing the URI. uri=%s\n", data, uri, flags, debugstr_w(uri->canon_uri));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment