Commit 0ecec6d7 authored by Thomas Mullaly's avatar Thomas Mullaly Committed by Alexandre Julliard

urlmon: Implemented a path parser for hierarchical URIs.

parent e446a7c7
......@@ -2217,6 +2217,34 @@ static const uri_properties uri_tests[] = {
{URL_SCHEME_HTTP,S_OK,FALSE},
{URLZONE_INVALID,E_NOTIMPL,FALSE}
}
},
{ "zip://www.google.com\\test", Uri_CREATE_NO_CANONICALIZE, S_OK, FALSE,
Uri_HAS_ABSOLUTE_URI|Uri_HAS_AUTHORITY|Uri_HAS_DISPLAY_URI|Uri_HAS_DOMAIN|
Uri_HAS_HOST|Uri_HAS_RAW_URI|Uri_HAS_SCHEME_NAME|Uri_HAS_HOST_TYPE|Uri_HAS_SCHEME,
TRUE,
{
{"zip://www.google.com\\test",S_OK,TRUE},
{"www.google.com\\test",S_OK,FALSE},
{"zip://www.google.com\\test",S_OK,TRUE},
{"google.com\\test",S_OK,FALSE},
{"",S_FALSE,TRUE},
{"",S_FALSE,TRUE},
{"www.google.com\\test",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,TRUE},
{"",S_FALSE,TRUE},
{"",S_FALSE,TRUE},
{"zip://www.google.com\\test",S_OK,FALSE},
{"zip",S_OK,FALSE},
{"",S_FALSE,FALSE},
{"",S_FALSE,FALSE}
},
{
{Uri_HOST_DNS,S_OK,FALSE},
{0,S_FALSE,FALSE},
{URL_SCHEME_UNKNOWN,S_OK,FALSE},
{URLZONE_INVALID,E_NOTIMPL,FALSE}
}
}
};
......@@ -2264,7 +2292,9 @@ static const invalid_uri invalid_uri_tests[] = {
/* Invalid port with IPv4 address. */
{"http://www.winehq.org:1abcd",0,FALSE},
/* Invalid port with IPv6 address. */
{"http://[::ffff]:32xy",0,FALSE}
{"http://[::ffff]:32xy",0,FALSE},
/* Not allowed to have backslashes with NO_CANONICALIZE. */
{"gopher://www.google.com\\test",Uri_CREATE_NO_CANONICALIZE,FALSE}
};
typedef struct _uri_equality {
......
......@@ -113,6 +113,9 @@ typedef struct {
const WCHAR *port;
DWORD port_len;
USHORT port_value;
const WCHAR *path;
DWORD path_len;
} parse_data;
static const CHAR hexDigits[] = "0123456789ABCDEF";
......@@ -257,6 +260,10 @@ static inline BOOL is_hexdigit(WCHAR val) {
(val >= '0' && val <= '9'));
}
static inline BOOL is_path_delim(WCHAR val) {
return (!val || val == '#' || val == '?');
}
/* Computes the size of the given IPv6 address.
* Each h16 component is 16bits, if there is an IPv4 address, it's
* 32bits. If there's an elision it can be 16bits to 128bits, depending
......@@ -1482,6 +1489,70 @@ static BOOL parse_authority(const WCHAR **ptr, parse_data *data, DWORD flags) {
return TRUE;
}
/* Attempts to parse the path information of a hierarchical URI. */
static BOOL parse_path_hierarchical(const WCHAR **ptr, parse_data *data, DWORD flags) {
const WCHAR *start = *ptr;
static const WCHAR slash[] = {'/',0};
if(is_path_delim(**ptr)) {
if(data->scheme_type == URL_SCHEME_WILDCARD) {
/* Wildcard schemes don't get a '/' attached if their path is
* empty.
*/
data->path = NULL;
data->path_len = 0;
} else if(!(flags & Uri_CREATE_NO_CANONICALIZE)) {
/* If the path component is empty, then a '/' is added. */
data->path = slash;
data->path_len = 1;
}
} else {
while(!is_path_delim(**ptr)) {
if(**ptr == '%' && data->scheme_type != URL_SCHEME_UNKNOWN &&
data->scheme_type != URL_SCHEME_FILE) {
if(!check_pct_encoded(ptr)) {
*ptr = start;
return FALSE;
} else
continue;
} else if(**ptr == '\\') {
/* Not allowed to have a backslash if NO_CANONICALIZE is set
* and the scheme is known type (but not a file scheme).
*/
if(flags & Uri_CREATE_NO_CANONICALIZE) {
if(data->scheme_type != URL_SCHEME_FILE &&
data->scheme_type != URL_SCHEME_UNKNOWN) {
*ptr = start;
return FALSE;
}
}
}
++(*ptr);
}
/* The only time a URI doesn't have a path is when
* the NO_CANONICALIZE flag is set and the raw URI
* didn't contain one.
*/
if(*ptr == start) {
data->path = NULL;
data->path_len = 0;
} else {
data->path = start;
data->path_len = *ptr - start;
}
}
if(data->path)
TRACE("(%p %p %x): Parsed path %s len=%d\n", ptr, data, flags,
debugstr_wn(data->path, data->path_len), data->path_len);
else
TRACE("(%p %p %x): The URI contained no path\n", ptr, data, flags);
return TRUE;
}
/* Determines how the URI should be parsed after the scheme information.
*
* If the scheme is followed, by "//" then, it is treated as an hierarchical URI
......@@ -1525,11 +1596,15 @@ static BOOL parse_hierpart(const WCHAR **ptr, parse_data *data, DWORD flags) {
TRACE("(%p %p %x): Treating URI as an hierarchical URI.\n", ptr, data, flags);
data->is_opaque = FALSE;
if(data->scheme_type == URL_SCHEME_FILE)
/* Skip past the "//" after the scheme (if any). */
check_hierarchical(ptr);
/* TODO: Handle hierarchical URI's, parse authority then parse the path. */
if(!parse_authority(ptr, data, flags))
return FALSE;
return TRUE;
return parse_path_hierarchical(ptr, data, flags);
}
}
......@@ -1564,6 +1639,8 @@ static BOOL parse_uri(parse_data *data, DWORD flags) {
if(!parse_hierpart(pptr, data, flags))
return FALSE;
/* TODO: Parse query and fragment (if the URI has one). */
TRACE("(%p %x): FINISHED PARSING URI.\n", data, flags);
return TRUE;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment