Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
W
wine-winehq
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Registry
Registry
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
wine
wine-winehq
Commits
37aabda6
Commit
37aabda6
authored
Jan 23, 2024
by
Zebediah Figura
Committed by
Alexandre Julliard
Feb 13, 2024
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
kernelbase: Reimplement UrlCanonicalize().
Wine-Bug:
https://bugs.winehq.org/show_bug.cgi?id=23166
parent
80a92c07
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
618 additions
and
272 deletions
+618
-272
path.c
dlls/kernelbase/path.c
+613
-265
url.c
dlls/shlwapi/tests/url.c
+5
-7
No files found.
dlls/kernelbase/path.c
View file @
37aabda6
/*
* Copyright 2018 Nikolay Sivov
* Copyright 2018 Zhiyi Zhang
* Copyright 2021-2023 Zebediah Figura
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
...
...
@@ -18,7 +19,9 @@
*/
#include <stdarg.h>
#include <stdbool.h>
#include <string.h>
#include <wchar.h>
#include "windef.h"
#include "winbase.h"
...
...
@@ -91,6 +94,38 @@ static WCHAR *heap_strdupAtoW(const char *str)
return
ret
;
}
static
bool
array_reserve
(
void
**
elements
,
size_t
*
capacity
,
size_t
count
,
size_t
size
)
{
unsigned
int
new_capacity
,
max_capacity
;
void
*
new_elements
;
if
(
count
<=
*
capacity
)
return
true
;
max_capacity
=
~
(
SIZE_T
)
0
/
size
;
if
(
count
>
max_capacity
)
return
false
;
new_capacity
=
max
(
4
,
*
capacity
);
while
(
new_capacity
<
count
&&
new_capacity
<=
max_capacity
/
2
)
new_capacity
*=
2
;
if
(
new_capacity
<
count
)
new_capacity
=
max_capacity
;
if
(
!
(
new_elements
=
heap_realloc
(
*
elements
,
new_capacity
*
size
)))
return
false
;
*
elements
=
new_elements
;
*
capacity
=
new_capacity
;
return
true
;
}
static
bool
is_slash
(
char
c
)
{
return
c
==
'/'
||
c
==
'\\'
;
}
static
BOOL
is_drive_spec
(
const
WCHAR
*
str
)
{
return
isalpha
(
str
[
0
]
)
&&
str
[
1
]
==
':'
;
...
...
@@ -2770,6 +2805,13 @@ url_schemes[] =
{
URL_SCHEME_RES
,
L"res"
},
};
static
const
WCHAR
*
parse_scheme
(
const
WCHAR
*
p
)
{
while
(
*
p
<=
0x7f
&&
(
iswalnum
(
*
p
)
||
*
p
==
'+'
||
*
p
==
'-'
||
*
p
==
'.'
))
++
p
;
return
p
;
}
static
DWORD
get_scheme_code
(
const
WCHAR
*
scheme
,
DWORD
scheme_len
)
{
unsigned
int
i
;
...
...
@@ -3554,348 +3596,666 @@ HRESULT WINAPI UrlCanonicalizeA(const char *src_url, char *canonicalized, DWORD
return
hr
;
}
HRESULT
WINAPI
UrlCanonicalizeW
(
const
WCHAR
*
src_url
,
WCHAR
*
canonicalized
,
DWORD
*
canonicalized_len
,
DWORD
flags
)
static
bool
scheme_is_opaque
(
URL_SCHEME
scheme
)
{
WCHAR
*
url_copy
,
*
url
,
*
wk2
,
*
mp
,
*
mp2
;
DWORD
nByteLen
,
nLen
,
nWkLen
;
const
WCHAR
*
wk1
,
*
root
;
DWORD
escape_flags
;
WCHAR
slash
=
'\0'
;
HRESULT
hr
=
S_OK
;
BOOL
is_file_url
;
INT
stat
e
;
switch
(
scheme
)
{
case
URL_SCHEME_ABOUT
:
case
URL_SCHEME_JAVASCRIPT
:
case
URL_SCHEME_MAILTO
:
case
URL_SCHEME_SHELL
:
case
URL_SCHEME_VBSCRIPT
:
return
tru
e
;
TRACE
(
"%s, %p, %p, %#lx
\n
"
,
wine_dbgstr_w
(
src_url
),
canonicalized
,
canonicalized_len
,
flags
);
default:
return
false
;
}
}
if
(
!
src_url
||
!
canonicalized
||
!
canonicalized_len
||
!*
canonicalized_len
)
return
E_INVALIDARG
;
static
bool
scheme_preserves_backslashes
(
URL_SCHEME
scheme
)
{
switch
(
scheme
)
{
case
URL_SCHEME_FTP
:
case
URL_SCHEME_INVALID
:
case
URL_SCHEME_LOCAL
:
case
URL_SCHEME_MK
:
case
URL_SCHEME_RES
:
case
URL_SCHEME_UNKNOWN
:
case
URL_SCHEME_WAIS
:
return
true
;
if
(
!*
src_url
)
default:
return
false
;
}
}
static
bool
scheme_uses_hostname
(
URL_SCHEME
scheme
)
{
switch
(
scheme
)
{
*
canonicalized
=
0
;
return
S_OK
;
case
URL_SCHEME_ABOUT
:
case
URL_SCHEME_JAVASCRIPT
:
case
URL_SCHEME_MAILTO
:
case
URL_SCHEME_MK
:
case
URL_SCHEME_SHELL
:
case
URL_SCHEME_VBSCRIPT
:
return
false
;
default:
return
true
;
}
}
/* Remove '\t' characters from URL */
nByteLen
=
(
lstrlenW
(
src_url
)
+
1
)
*
sizeof
(
WCHAR
);
/* length in bytes */
url
=
HeapAlloc
(
GetProcessHeap
(),
0
,
nByteLen
);
if
(
!
url
)
return
E_OUTOFMEMORY
;
static
bool
scheme_char_is_separator
(
URL_SCHEME
scheme
,
WCHAR
c
)
{
if
(
c
==
'/'
)
return
true
;
if
(
c
==
'\\'
&&
scheme
!=
URL_SCHEME_INVALID
&&
scheme
!=
URL_SCHEME_UNKNOWN
)
return
true
;
return
false
;
}
wk1
=
src_url
;
wk2
=
url
;
do
static
bool
scheme_char_is_hostname_separator
(
URL_SCHEME
scheme
,
DWORD
flags
,
WCHAR
c
)
{
switch
(
c
)
{
while
(
*
wk1
==
'\t'
)
wk1
++
;
*
wk2
++
=
*
wk1
;
}
while
(
*
wk1
++
);
case
0
:
case
'/'
:
return
true
;
case
'\\'
:
return
!
scheme_preserves_backslashes
(
scheme
);
case
'?'
:
return
scheme
!=
URL_SCHEME_FILE
||
(
flags
&
(
URL_WININET_COMPATIBILITY
|
URL_FILE_USE_PATHURL
));
case
'#'
:
return
scheme
!=
URL_SCHEME_FILE
;
default:
return
false
;
}
}
/* Allocate memory for simplified URL (before escaping) */
nByteLen
=
(
wk2
-
url
)
*
sizeof
(
WCHAR
);
url_copy
=
heap_alloc
(
nByteLen
+
sizeof
(
L"file:///"
));
if
(
!
url_copy
)
static
bool
scheme_char_is_dot_separator
(
URL_SCHEME
scheme
,
DWORD
flags
,
WCHAR
c
)
{
switch
(
c
)
{
heap_free
(
url
);
return
E_OUTOFMEMORY
;
case
0
:
case
'/'
:
case
'?'
:
return
true
;
case
'#'
:
return
(
scheme
!=
URL_SCHEME_FILE
||
!
(
flags
&
(
URL_WININET_COMPATIBILITY
|
URL_FILE_USE_PATHURL
)));
case
'\\'
:
return
(
scheme
!=
URL_SCHEME_INVALID
&&
scheme
!=
URL_SCHEME_UNKNOWN
&&
scheme
!=
URL_SCHEME_MK
);
default:
return
false
;
}
}
is_file_url
=
!
wcsncmp
(
url
,
L"file:"
,
5
);
/* There are essentially two types of behaviour concerning dot simplification,
* not counting opaque schemes:
*
* 1) Simplify dots if and only if the first element is not a single or double
* dot. If a double dot would rewind past the root, ignore it. For example:
*
* http://hostname/a/../../b/. -> http://hostname/b/
* http://hostname/./../../b/. -> http://hostname/./../../b/.
*
* 2) Effectively treat all paths as relative. Always simplify, except if a
* double dot would rewind past the root, in which case emit it verbatim.
* For example:
*
* wine://hostname/a/../../b/. -> wine://hostname/../b/
* wine://hostname/./../../b/. -> wine://hostname/../b/
*
* For unclear reasons, this behaviour also correlates with whether a final
* slash is always emitted after a single or double dot (e.g. if
* URL_DONT_SIMPLIFY is specified). The former type does not emit a slash; the
* latter does.
*/
static
bool
scheme_is_always_relative
(
URL_SCHEME
scheme
,
DWORD
flags
)
{
switch
(
scheme
)
{
case
URL_SCHEME_INVALID
:
case
URL_SCHEME_UNKNOWN
:
return
true
;
if
((
nByteLen
>=
5
*
sizeof
(
WCHAR
)
&&
!
wcsncmp
(
url
,
L"http:"
,
5
))
||
is_file_url
)
slash
=
'/'
;
case
URL_SCHEME_FILE
:
return
flags
&
(
URL_WININET_COMPATIBILITY
|
URL_FILE_USE_PATHURL
)
;
if
((
flags
&
(
URL_FILE_USE_PATHURL
|
URL_WININET_COMPATIBILITY
))
&&
is_file_url
)
slash
=
'\\'
;
default:
return
false
;
}
}
struct
string_buffer
{
WCHAR
*
string
;
size_t
len
,
capacity
;
};
static
void
append_string
(
struct
string_buffer
*
buffer
,
const
WCHAR
*
str
,
size_t
len
)
{
array_reserve
(
(
void
**
)
&
buffer
->
string
,
&
buffer
->
capacity
,
buffer
->
len
+
len
,
sizeof
(
WCHAR
)
);
memcpy
(
buffer
->
string
+
buffer
->
len
,
str
,
len
*
sizeof
(
WCHAR
)
);
buffer
->
len
+=
len
;
}
static
void
append_char
(
struct
string_buffer
*
buffer
,
WCHAR
c
)
{
append_string
(
buffer
,
&
c
,
1
);
}
if
(
nByteLen
>=
4
*
sizeof
(
WCHAR
)
&&
!
wcsncmp
(
url
,
L"res:"
,
4
))
static
char
get_slash_dir
(
URL_SCHEME
scheme
,
DWORD
flags
,
char
src
,
const
struct
string_buffer
*
dst
)
{
if
(
src
&&
scheme_preserves_backslashes
(
scheme
))
return
src
;
if
(
scheme
==
URL_SCHEME_FILE
&&
(
flags
&
(
URL_FILE_USE_PATHURL
|
URL_WININET_COMPATIBILITY
))
&&
!
wmemchr
(
dst
->
string
,
'#'
,
dst
->
len
))
return
'\\'
;
return
'/'
;
}
static
void
rewrite_url
(
struct
string_buffer
*
dst
,
const
WCHAR
*
url
,
DWORD
*
flags_ptr
)
{
DWORD
flags
=
*
flags_ptr
;
bool
pathurl
=
(
flags
&
(
URL_FILE_USE_PATHURL
|
URL_WININET_COMPATIBILITY
));
bool
is_relative
=
false
,
has_hostname
=
false
,
has_initial_slash
=
false
;
const
WCHAR
*
query
=
NULL
,
*
hash
=
NULL
;
URL_SCHEME
scheme
=
URL_SCHEME_INVALID
;
size_t
query_len
=
0
,
hash_len
=
0
;
const
WCHAR
*
scheme_end
,
*
src_end
;
const
WCHAR
*
hostname
=
NULL
;
size_t
hostname_len
=
0
;
const
WCHAR
*
src
=
url
;
size_t
root_offset
;
/* Determine the scheme. */
scheme_end
=
parse_scheme
(
url
);
if
(
*
scheme_end
==
':'
&&
scheme_end
>=
url
+
2
)
{
flags
&=
~
URL_FILE_USE_PATHURL
;
slash
=
'\0'
;
size_t
scheme_len
=
scheme_end
+
1
-
url
;
scheme
=
get_scheme_code
(
url
,
scheme_len
-
1
);
for
(
size_t
i
=
0
;
i
<
scheme_len
;
++
i
)
append_char
(
dst
,
tolower
(
*
src
++
));
}
else
if
(
url
[
0
]
==
'\\'
&&
url
[
1
]
==
'\\'
)
{
append_string
(
dst
,
L"file:"
,
5
);
if
(
!
pathurl
&&
!
(
flags
&
URL_UNESCAPE
))
flags
|=
URL_ESCAPE_UNSAFE
|
URL_ESCAPE_PERCENT
;
scheme
=
URL_SCHEME_FILE
;
/*
* state =
* 0 initial 1,3
* 1 have 2[+] alnum 2,3
* 2 have scheme (found :) 4,6,3
* 3 failed (no location)
* 4 have // 5,3
* 5 have 1[+] alnum 6,3
* 6 have location (found /) save root location
*/
has_hostname
=
true
;
}
wk1
=
url
;
wk2
=
url_copy
;
state
=
0
;
if
(
is_escaped_drive_spec
(
url
))
{
append_string
(
dst
,
L"file://"
,
7
);
if
(
!
pathurl
&&
!
(
flags
&
URL_UNESCAPE
))
flags
|=
URL_ESCAPE_UNSAFE
|
URL_ESCAPE_PERCENT
;
scheme
=
URL_SCHEME_FILE
;
/* Assume path */
if
(
url
[
1
]
==
':'
)
hostname_len
=
0
;
has_hostname
=
true
;
}
else
if
(
scheme
==
URL_SCHEME_MK
)
{
lstrcpyW
(
wk2
,
L"file:///"
);
wk2
+=
lstrlenW
(
wk2
);
if
(
flags
&
(
URL_FILE_USE_PATHURL
|
URL_WININET_COMPATIBILITY
))
if
(
src
[
0
]
==
'@'
)
{
slash
=
'\\'
;
--
wk2
;
while
(
*
src
&&
*
src
!=
'/'
)
append_char
(
dst
,
*
src
++
);
if
(
*
src
==
'/'
)
append_char
(
dst
,
*
src
++
);
else
append_char
(
dst
,
'/'
);
if
((
src
[
0
]
==
'.'
&&
scheme_char_is_dot_separator
(
scheme
,
flags
,
src
[
1
]
))
||
(
src
[
0
]
==
'.'
&&
src
[
1
]
==
'.'
&&
scheme_char_is_dot_separator
(
scheme
,
flags
,
src
[
2
]
)))
is_relative
=
true
;
}
else
flags
|=
URL_ESCAPE_UNSAFE
;
state
=
5
;
is_file_url
=
TRUE
;
}
else
if
(
url
[
0
]
==
'/'
)
else
if
(
scheme_uses_hostname
(
scheme
)
&&
scheme_char_is_separator
(
scheme
,
src
[
0
]
)
&&
scheme_char_is_separator
(
scheme
,
src
[
1
]
))
{
state
=
5
;
is_file_url
=
TRUE
;
append_char
(
dst
,
scheme_preserves_backslashes
(
scheme
)
?
src
[
0
]
:
'/'
);
append_char
(
dst
,
scheme_preserves_backslashes
(
scheme
)
?
src
[
1
]
:
'/'
);
src
+=
2
;
if
(
scheme
==
URL_SCHEME_FILE
&&
is_slash
(
src
[
0
]
)
&&
is_slash
(
src
[
1
]
))
{
while
(
is_slash
(
*
src
))
++
src
;
}
hostname
=
src
;
while
(
!
scheme_char_is_hostname_separator
(
scheme
,
flags
,
*
src
))
++
src
;
hostname_len
=
src
-
hostname
;
has_hostname
=
true
;
has_initial_slash
=
true
;
}
else
if
(
scheme_char_is_separator
(
scheme
,
src
[
0
]
))
{
has_initial_slash
=
true
;
if
(
scheme
==
URL_SCHEME_UNKNOWN
||
scheme
==
URL_SCHEME_INVALID
)
{
/* Special case: an unknown scheme starting with a single slash
* considers the "root" to be the single slash.
* Most other schemes treat it as an empty path segment instead. */
append_char
(
dst
,
*
src
++
);
if
(
*
src
==
'\\'
)
++
src
;
}
else
if
(
scheme
==
URL_SCHEME_FILE
)
{
src
++
;
while
(
*
wk1
)
append_string
(
dst
,
L"//"
,
2
);
hostname_len
=
0
;
has_hostname
=
true
;
}
}
else
{
switch
(
state
)
if
(
scheme
==
URL_SCHEME_FILE
)
{
case
0
:
if
(
!
iswalnum
(
*
wk1
))
{
state
=
3
;
break
;}
*
wk2
++
=
*
wk1
++
;
if
(
!
iswalnum
(
*
wk1
))
{
state
=
3
;
break
;}
*
wk2
++
=
*
wk1
++
;
state
=
1
;
break
;
case
1
:
*
wk2
++
=
*
wk1
;
if
(
*
wk1
++
==
':'
)
state
=
2
;
break
;
case
2
:
*
wk2
++
=
*
wk1
++
;
if
(
*
wk1
!=
'/'
)
{
state
=
6
;
break
;}
*
wk2
++
=
*
wk1
++
;
if
((
flags
&
URL_FILE_USE_PATHURL
)
&&
nByteLen
>=
9
*
sizeof
(
WCHAR
)
&&
is_file_url
&&
!
wcsncmp
(
wk1
,
L"localhost"
,
9
))
if
(
is_escaped_drive_spec
(
src
))
{
append_string
(
dst
,
L"//"
,
2
);
hostname_len
=
0
;
has_hostname
=
true
;
}
else
{
wk1
+=
9
;
while
(
*
wk1
==
'\\'
&&
(
flags
&
URL_FILE_USE_PATHURL
))
wk1
++
;
if
(
flags
&
URL_FILE_USE_PATHURL
)
append_string
(
dst
,
L"//"
,
2
);
}
}
}
if
(
scheme
==
URL_SCHEME_FILE
&&
(
flags
&
URL_FILE_USE_PATHURL
))
flags
|=
URL_UNESCAPE
;
*
flags_ptr
=
flags
;
if
(
has_hostname
)
{
if
(
scheme
==
URL_SCHEME_FILE
)
{
bool
is_drive
=
false
;
if
(
is_slash
(
*
src
))
++
src
;
if
(
*
wk1
==
'/'
&&
(
flags
&
URL_FILE_USE_PATHURL
))
wk1
++
;
else
if
(
is_file_url
)
if
(
hostname_len
>=
2
&&
is_escaped_drive_spec
(
hostname
))
{
hostname_len
=
0
;
src
=
hostname
;
is_drive
=
true
;
}
else
if
(
is_escaped_drive_spec
(
src
))
{
const
WCHAR
*
body
=
wk1
;
is_drive
=
true
;
}
while
(
*
body
==
'/'
)
++
body
;
if
(
pathurl
)
{
if
(
hostname_len
==
9
&&
!
wcsnicmp
(
hostname
,
L"localhost"
,
9
))
{
hostname_len
=
0
;
if
(
is_slash
(
*
src
))
++
src
;
if
(
is_escaped_drive_spec
(
src
))
is_drive
=
true
;
}
if
(
is_drive_spec
(
body
)
)
if
(
!
is_drive
)
{
if
(
!
(
flags
&
(
URL_WININET_COMPATIBILITY
|
URL_FILE_USE_PATHURL
))
)
if
(
hostname_len
)
{
if
(
slash
)
*
wk2
++
=
slash
;
else
*
wk2
++
=
'/'
;
append_string
(
dst
,
L"
\\\\
"
,
2
);
append_string
(
dst
,
hostname
,
hostname_len
);
}
if
((
*
src
&&
*
src
!=
'?'
)
||
(
flags
&
URL_WININET_COMPATIBILITY
))
append_char
(
dst
,
get_slash_dir
(
scheme
,
flags
,
0
,
dst
));
}
else
}
else
{
if
(
hostname_len
)
append_string
(
dst
,
hostname
,
hostname_len
);
append_char
(
dst
,
'/'
);
}
if
(
is_drive
)
{
/* Root starts after the first slash when file flags are in use,
* but directly after the drive specification if not. */
if
(
pathurl
)
{
if
(
flags
&
URL_WININET_COMPATIBILITY
)
while
(
!
scheme_char_is_hostname_separator
(
scheme
,
flags
,
*
src
))
append_char
(
dst
,
*
src
++
);
if
(
is_slash
(
*
src
))
{
if
(
*
wk1
==
'/'
&&
*
(
wk1
+
1
)
!=
'/'
)
{
*
wk2
++
=
'\\'
;
}
else
{
*
wk2
++
=
'\\'
;
*
wk2
++
=
'\\'
;
}
}
else
{
if
(
*
wk1
==
'/'
&&
*
(
wk1
+
1
)
!=
'/'
)
{
if
(
slash
)
*
wk2
++
=
slash
;
else
*
wk2
++
=
'/'
;
}
append_char
(
dst
,
'\\'
);
src
++
;
}
}
wk1
=
body
;
}
state
=
4
;
break
;
case
3
:
nWkLen
=
lstrlenW
(
wk1
);
memcpy
(
wk2
,
wk1
,
(
nWkLen
+
1
)
*
sizeof
(
WCHAR
));
mp
=
wk2
;
wk1
+=
nWkLen
;
wk2
+=
nWkLen
;
if
(
slash
)
{
while
(
mp
<
wk2
)
else
{
if
(
*
mp
==
'/'
||
*
mp
==
'\\'
)
*
mp
=
slash
;
mp
++
;
append_char
(
dst
,
*
src
++
);
append_char
(
dst
,
*
src
++
);
if
(
is_slash
(
*
src
))
{
append_char
(
dst
,
'/'
);
src
++
;
}
}
}
break
;
case
4
:
if
(
!
iswalnum
(
*
wk1
)
&&
(
*
wk1
!=
'-'
)
&&
(
*
wk1
!=
'.'
)
&&
(
*
wk1
!=
':'
))
{
state
=
3
;
break
;
}
while
(
iswalnum
(
*
wk1
)
||
(
*
wk1
==
'-'
)
||
(
*
wk1
==
'.'
)
||
(
*
wk1
==
':'
))
*
wk2
++
=
*
wk1
++
;
state
=
5
;
if
(
!*
wk1
)
}
else
{
for
(
size_t
i
=
0
;
i
<
hostname_len
;
++
i
)
{
if
(
s
lash
)
*
wk2
++
=
slash
;
if
(
s
cheme
==
URL_SCHEME_UNKNOWN
||
scheme
==
URL_SCHEME_INVALID
)
append_char
(
dst
,
hostname
[
i
]
)
;
else
*
wk2
++
=
'/'
;
append_char
(
dst
,
tolower
(
hostname
[
i
]
))
;
}
break
;
case
5
:
if
(
*
wk1
!=
'/'
&&
*
wk1
!=
'\\'
)
if
(
*
src
==
'/'
||
*
src
==
'\\'
)
{
state
=
3
;
break
;
append_char
(
dst
,
scheme_preserves_backslashes
(
scheme
)
?
*
src
:
'/'
)
;
src
++
;
}
while
(
*
wk1
==
'/'
||
*
wk1
==
'\\'
)
else
{
if
(
slash
)
*
wk2
++
=
slash
;
else
*
wk2
++
=
*
wk1
;
wk1
++
;
append_char
(
dst
,
'/'
);
}
state
=
6
;
break
;
case
6
:
if
(
flags
&
URL_DONT_SIMPLIFY
)
}
if
((
src
[
0
]
==
'.'
&&
scheme_char_is_dot_separator
(
scheme
,
flags
,
src
[
1
]
))
||
(
src
[
0
]
==
'.'
&&
src
[
1
]
==
'.'
&&
scheme_char_is_dot_separator
(
scheme
,
flags
,
src
[
2
]
)))
{
if
(
!
scheme_is_always_relative
(
scheme
,
flags
))
is_relative
=
true
;
}
}
/* root_offset now points to the point past which we will not rewind.
* If there is a hostname, it points to the character after the closing
* slash. */
root_offset
=
dst
->
len
;
/* Break up the rest of the URL into the body, query, and hash parts. */
src_end
=
src
+
wcslen
(
src
);
if
(
scheme_is_opaque
(
scheme
))
{
/* +1 for null terminator */
append_string
(
dst
,
src
,
src_end
+
1
-
src
);
return
;
}
if
(
scheme
==
URL_SCHEME_FILE
)
{
if
(
!
pathurl
)
{
if
(
src
[
0
]
==
'#'
)
hash
=
src
;
else
if
(
is_slash
(
src
[
0
]
)
&&
src
[
1
]
==
'#'
)
hash
=
src
+
1
;
if
(
src
[
0
]
==
'?'
)
query
=
src
;
else
if
(
is_slash
(
src
[
0
]
)
&&
src
[
1
]
==
'?'
)
query
=
src
+
1
;
}
else
{
query
=
wcschr
(
src
,
'?'
);
}
if
(
!
hash
)
{
for
(
const
WCHAR
*
p
=
src
;
p
<
src_end
;
++
p
)
{
state
=
3
;
break
;
if
(
!
wcsnicmp
(
p
,
L".htm#"
,
5
))
hash
=
p
+
4
;
else
if
(
!
wcsnicmp
(
p
,
L".html#"
,
6
))
hash
=
p
+
5
;
}
}
}
else
{
query
=
wcschr
(
src
,
'?'
);
hash
=
wcschr
(
src
,
'#'
);
}
/* Now at root location, cannot back up any more. */
/* "root" will point at the '/' */
if
(
query
)
query_len
=
((
hash
&&
hash
>
query
)
?
hash
:
src_end
)
-
query
;
if
(
hash
)
hash_len
=
((
query
&&
query
>
hash
)
?
query
:
src_end
)
-
hash
;
if
(
query
)
src_end
=
query
;
if
(
hash
&&
hash
<
src_end
)
src_end
=
hash
;
if
(
scheme
==
URL_SCHEME_UNKNOWN
&&
!
has_initial_slash
)
{
if
(
!
(
flags
&
URL_DONT_SIMPLIFY
)
&&
src
[
0
]
==
'.'
&&
src_end
==
src
+
1
)
src
++
;
flags
|=
URL_DONT_SIMPLIFY
;
}
root
=
wk2
-
1
;
while
(
*
wk1
)
while
(
src
<
src_end
)
{
bool
is_dots
=
false
;
size_t
len
;
for
(
len
=
0
;
src
+
len
<
src_end
&&
!
scheme_char_is_separator
(
scheme
,
src
[
len
]
);
++
len
)
;
if
(
src
[
0
]
==
'.'
&&
scheme_char_is_dot_separator
(
scheme
,
flags
,
src
[
1
]
))
{
if
(
!
is_relative
)
{
mp
=
wcschr
(
wk1
,
'/'
);
mp2
=
wcschr
(
wk1
,
'\\'
);
if
(
mp2
&&
(
!
mp
||
mp2
<
mp
))
mp
=
mp2
;
if
(
!
mp
)
if
(
flags
&
URL_DONT_SIMPLIFY
)
{
is_dots
=
true
;
}
else
{
nWkLen
=
lstrlenW
(
wk1
);
memcpy
(
wk2
,
wk1
,
(
nWkLen
+
1
)
*
sizeof
(
WCHAR
));
wk1
+=
nWkLen
;
wk2
+=
nWkLen
;
++
src
;
if
(
*
src
==
'/'
||
*
src
==
'\\'
)
++
src
;
continue
;
}
nLen
=
mp
-
wk1
;
if
(
nLen
)
}
}
else
if
(
src
[
0
]
==
'.'
&&
src
[
1
]
==
'.'
&&
scheme_char_is_dot_separator
(
scheme
,
flags
,
src
[
2
]
))
{
if
(
!
is_relative
)
{
if
(
flags
&
URL_DONT_SIMPLIFY
)
{
memcpy
(
wk2
,
wk1
,
nLen
*
sizeof
(
WCHAR
));
wk2
+=
nLen
;
wk1
+=
nLen
;
is_dots
=
true
;
}
if
(
slash
)
*
wk2
++
=
slash
;
else
*
wk2
++
=
*
wk1
;
wk1
++
;
while
(
*
wk1
==
'.'
)
else
if
(
dst
->
len
==
root_offset
&&
scheme_is_always_relative
(
scheme
,
flags
))
{
TRACE
(
"found '/.'
\n
"
);
if
(
wk1
[
1
]
==
'/'
||
wk1
[
1
]
==
'\\'
)
{
/* case of /./ -> skip the ./ */
wk1
+=
2
;
}
else
if
(
wk1
[
1
]
==
'.'
&&
(
wk1
[
2
]
==
'/'
||
wk1
[
2
]
==
'\\'
||
wk1
[
2
]
==
'?'
||
wk1
[
2
]
==
'#'
||
!
wk1
[
2
]))
{
/* case /../ -> need to backup wk2 */
TRACE
(
"found '/../'
\n
"
);
*
(
wk2
-
1
)
=
'\0'
;
/* set end of string */
mp
=
wcsrchr
(
root
,
'/'
);
mp2
=
wcsrchr
(
root
,
'\\'
);
if
(
mp2
&&
(
!
mp
||
mp2
<
mp
))
mp
=
mp2
;
if
(
mp
&&
(
mp
>=
root
))
{
/* found valid backup point */
wk2
=
mp
+
1
;
if
(
wk1
[
2
]
!=
'/'
&&
wk1
[
2
]
!=
'\\'
)
wk1
+=
2
;
else
wk1
+=
3
;
}
else
{
/* did not find point, restore '/' */
*
(
wk2
-
1
)
=
slash
;
break
;
}
}
/* We could also use is_dots here, except that we need to
* update root afterwards. */
append_char
(
dst
,
*
src
++
);
append_char
(
dst
,
*
src
++
);
if
(
*
src
==
'/'
||
*
src
==
'\\'
)
append_char
(
dst
,
get_slash_dir
(
scheme
,
flags
,
*
src
++
,
dst
));
else
break
;
append_char
(
dst
,
get_slash_dir
(
scheme
,
flags
,
0
,
dst
));
root_offset
=
dst
->
len
;
continue
;
}
else
{
if
(
dst
->
len
>
root_offset
)
--
dst
->
len
;
/* rewind past the last slash */
while
(
dst
->
len
>
root_offset
&&
!
scheme_char_is_separator
(
scheme
,
dst
->
string
[
dst
->
len
-
1
]
))
--
dst
->
len
;
src
+=
2
;
if
(
*
src
==
'/'
||
*
src
==
'\\'
)
++
src
;
continue
;
}
}
*
wk2
=
'\0'
;
break
;
default:
FIXME
(
"how did we get here - state=%d
\n
"
,
state
);
heap_free
(
url_copy
);
heap_free
(
url
);
return
E_INVALIDARG
;
}
*
wk2
=
'\0'
;
TRACE
(
"Simplified, orig <%s>, simple <%s>
\n
"
,
wine_dbgstr_w
(
src_url
),
wine_dbgstr_w
(
url_copy
));
if
(
len
)
{
append_string
(
dst
,
src
,
len
);
src
+=
len
;
}
if
(
*
src
==
'?'
||
*
src
==
'#'
||
!*
src
)
{
if
(
scheme
==
URL_SCHEME_UNKNOWN
&&
!
has_initial_slash
)
is_dots
=
false
;
if
(
is_dots
&&
scheme_is_always_relative
(
scheme
,
flags
))
append_char
(
dst
,
get_slash_dir
(
scheme
,
flags
,
0
,
dst
));
}
else
/* slash */
{
append_char
(
dst
,
get_slash_dir
(
scheme
,
flags
,
*
src
++
,
dst
));
}
}
nLen
=
lstrlenW
(
url_copy
);
while
((
nLen
>
0
)
&&
((
url_copy
[
nLen
-
1
]
<=
' '
)))
url_copy
[
--
nLen
]
=
0
;
if
((
flags
&
URL_UNESCAPE
)
||
((
flags
&
URL_FILE_USE_PATHURL
)
&&
nByteLen
>=
5
*
sizeof
(
WCHAR
)
&&
!
wcsncmp
(
url
,
L"file:"
,
5
)))
/* If the source was non-empty but collapsed to an empty string, output a
* single slash. */
if
(
!
dst
->
len
&&
src_end
!=
url
)
append_char
(
dst
,
'/'
);
/* UNKNOWN and FILE schemes usually reorder the ? before the #, but others
* emit them in the original order. */
if
(
query
&&
hash
&&
scheme
!=
URL_SCHEME_FILE
&&
scheme
!=
URL_SCHEME_INVALID
&&
scheme
!=
URL_SCHEME_UNKNOWN
)
{
if
(
query
<
hash
)
{
append_string
(
dst
,
query
,
query_len
);
append_string
(
dst
,
hash
,
hash_len
);
}
else
{
append_string
(
dst
,
hash
,
hash_len
);
append_string
(
dst
,
query
,
query_len
);
}
}
else
if
(
!
(
scheme
==
URL_SCHEME_FILE
&&
(
flags
&
URL_FILE_USE_PATHURL
)))
{
UrlUnescapeW
(
url_copy
,
NULL
,
&
nLen
,
URL_UNESCAPE_INPLACE
);
if
(
query
)
append_string
(
dst
,
query
,
query_len
);
if
(
hash
)
append_string
(
dst
,
hash
,
hash_len
);
}
append_char
(
dst
,
0
);
}
HRESULT
WINAPI
UrlCanonicalizeW
(
const
WCHAR
*
src_url
,
WCHAR
*
canonicalized
,
DWORD
*
canonicalized_len
,
DWORD
flags
)
{
struct
string_buffer
rewritten
=
{
0
};
DWORD
escape_flags
;
HRESULT
hr
=
S_OK
;
const
WCHAR
*
src
;
WCHAR
*
url
,
*
dst
;
DWORD
len
;
TRACE
(
"%s, %p, %p, %#lx
\n
"
,
wine_dbgstr_w
(
src_url
),
canonicalized
,
canonicalized_len
,
flags
);
if
(
!
src_url
||
!
canonicalized
||
!
canonicalized_len
||
!*
canonicalized_len
)
return
E_INVALIDARG
;
if
(
!*
src_url
)
{
*
canonicalized
=
0
;
return
S_OK
;
}
/* PATHURL takes precedence. */
if
(
flags
&
URL_FILE_USE_PATHURL
)
flags
&=
~
URL_WININET_COMPATIBILITY
;
/* strip initial and final C0 control characters and space */
src
=
src_url
;
while
(
*
src
>
0
&&
*
src
<=
0x20
)
++
src
;
len
=
wcslen
(
src
);
while
(
len
&&
src
[
len
-
1
]
>
0
&&
src
[
len
-
1
]
<=
0x20
)
--
len
;
if
(
!
(
url
=
HeapAlloc
(
GetProcessHeap
(),
0
,
(
len
+
1
)
*
sizeof
(
WCHAR
)
)))
return
E_OUTOFMEMORY
;
dst
=
url
;
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
{
if
(
src
[
i
]
!=
'\t'
&&
src
[
i
]
!=
'\n'
&&
src
[
i
]
!=
'\r'
)
*
dst
++
=
src
[
i
];
}
*
dst
++
=
0
;
rewrite_url
(
&
rewritten
,
url
,
&
flags
);
if
(
flags
&
URL_UNESCAPE
)
{
len
=
rewritten
.
len
;
UrlUnescapeW
(
rewritten
.
string
,
NULL
,
&
len
,
URL_UNESCAPE_INPLACE
);
rewritten
.
len
=
wcslen
(
rewritten
.
string
)
+
1
;
}
/* URL_ESCAPE_SEGMENT_ONLY seems to be ignored. */
escape_flags
=
flags
&
(
URL_ESCAPE_UNSAFE
|
URL_ESCAPE_SPACES_ONLY
|
URL_ESCAPE_PERCENT
|
URL_DONT_ESCAPE_EXTRA_INFO
|
URL_ESCAPE_SEGMENT_ONLY
);
URL_DONT_ESCAPE_EXTRA_INFO
);
if
(
escape_flags
)
{
escape_flags
&=
~
URL_ESCAPE_UNSAFE
;
hr
=
UrlEscapeW
(
url_copy
,
canonicalized
,
canonicalized_len
,
escape_flags
);
hr
=
UrlEscapeW
(
rewritten
.
string
,
canonicalized
,
canonicalized_len
,
escape_flags
);
}
else
{
/* No escaping needed, just copy the string */
nLen
=
lstrlenW
(
url_copy
);
if
(
nLen
<
*
canonicalized_len
)
memcpy
(
canonicalized
,
url_copy
,
(
nLen
+
1
)
*
sizeof
(
WCHAR
));
if
(
rewritten
.
len
<=
*
canonicalized_len
)
{
memcpy
(
canonicalized
,
rewritten
.
string
,
rewritten
.
len
*
sizeof
(
WCHAR
)
);
*
canonicalized_len
=
rewritten
.
len
-
1
;
}
else
{
hr
=
E_POINTER
;
nLen
++
;
*
canonicalized_len
=
rewritten
.
len
;
}
*
canonicalized_len
=
nLen
;
}
heap_free
(
url_copy
);
heap_free
(
url
);
heap_free
(
rewritten
.
string
);
heap_free
(
url
);
if
(
hr
==
S_OK
)
TRACE
(
"result %s
\n
"
,
wine_dbgstr_w
(
canonicalized
));
...
...
@@ -4223,13 +4583,6 @@ HRESULT WINAPI UrlGetPartA(const char *url, char *out, DWORD *out_len, DWORD par
return
hr
;
}
static
const
WCHAR
*
parse_scheme
(
const
WCHAR
*
p
)
{
while
(
iswalnum
(
*
p
)
||
*
p
==
'+'
||
*
p
==
'-'
||
*
p
==
'.'
)
++
p
;
return
p
;
}
static
const
WCHAR
*
parse_url_element
(
const
WCHAR
*
url
,
const
WCHAR
*
separators
)
{
const
WCHAR
*
p
;
...
...
@@ -4239,11 +4592,6 @@ static const WCHAR *parse_url_element( const WCHAR *url, const WCHAR *separators
return
url
+
wcslen
(
url
);
}
static
BOOL
is_slash
(
char
c
)
{
return
c
==
'/'
||
c
==
'\\'
;
}
static
void
parse_url
(
const
WCHAR
*
url
,
struct
parsed_url
*
pl
)
{
const
WCHAR
*
work
;
...
...
dlls/shlwapi/tests/url.c
View file @
37aabda6
...
...
@@ -772,7 +772,7 @@ static void test_UrlGetPart(void)
}
/* ########################### */
static
void
check_url_canonicalize
(
const
char
*
url
,
DWORD
flags
,
const
char
*
expect
,
BOOL
todo
)
static
void
check_url_canonicalize
(
const
char
*
url
,
DWORD
flags
,
const
char
*
expect
)
{
char
output
[
INTERNET_MAX_URL_LENGTH
];
WCHAR
outputW
[
INTERNET_MAX_URL_LENGTH
];
...
...
@@ -788,8 +788,7 @@ static void check_url_canonicalize(const char *url, DWORD flags, const char *exp
ok
(
hr
==
E_INVALIDARG
,
"Got unexpected hr %#lx.
\n
"
,
hr
);
hr
=
UrlCanonicalizeA
(
url
,
output
,
&
size
,
flags
);
ok
(
hr
==
S_OK
||
(
!
url
[
0
]
&&
hr
==
S_FALSE
)
/* Vista+ */
,
"Got unexpected hr %#lx.
\n
"
,
hr
);
todo_wine_if
(
todo
)
ok
(
!
strcmp
(
output
,
expect
),
"Expected %s, got %s.
\n
"
,
debugstr_a
(
expect
),
debugstr_a
(
output
));
ok
(
!
strcmp
(
output
,
expect
),
"Expected %s, got %s.
\n
"
,
debugstr_a
(
expect
),
debugstr_a
(
output
));
size
=
INTERNET_MAX_URL_LENGTH
;
hr
=
UrlCanonicalizeW
(
urlW
,
NULL
,
&
size
,
flags
);
...
...
@@ -967,12 +966,11 @@ static void test_UrlCanonicalizeA(void)
const
char
*
url
;
DWORD
flags
;
const
char
*
expect
;
BOOL
todo
;
}
tests
[]
=
{
{
""
,
0
,
""
},
{
"http://www.winehq.org/tests/../tests/../.."
,
0
,
"http://www.winehq.org/"
,
TRUE
},
{
"http://www.winehq.org/tests/../tests/../.."
,
0
,
"http://www.winehq.org/"
},
{
"http://www.winehq.org/.."
,
0
,
"http://www.winehq.org/.."
},
{
"http://www.winehq.org/tests/tests2/../../tests"
,
0
,
"http://www.winehq.org/tests"
},
{
"http://www.winehq.org/tests/../tests"
,
0
,
"http://www.winehq.org/tests"
},
...
...
@@ -1066,7 +1064,7 @@ static void test_UrlCanonicalizeA(void)
{
"///A/../B"
,
URL_WININET_COMPATIBILITY
,
"///B"
},
{
"A"
,
0
,
"A"
},
{
"../A"
,
0
,
"../A"
},
{
"A/../B"
,
0
,
"B"
,
TRUE
},
{
"A/../B"
,
0
,
"B"
},
{
"/uri-res/N2R?urn:sha1:B3K"
,
URL_DONT_ESCAPE_EXTRA_INFO
|
URL_WININET_COMPATIBILITY
/*0x82000000*/
,
"/uri-res/N2R?urn:sha1:B3K"
}
/*LimeWire online installer calls this*/
,
{
"http:www.winehq.org/dir/../index.html"
,
0
,
"http:www.winehq.org/index.html"
},
{
"http://localhost/test.html"
,
URL_FILE_USE_PATHURL
,
"http://localhost/test.html"
},
...
...
@@ -1158,7 +1156,7 @@ static void test_UrlCanonicalizeA(void)
/* test url-modification */
for
(
i
=
0
;
i
<
ARRAY_SIZE
(
tests
);
i
++
)
check_url_canonicalize
(
tests
[
i
].
url
,
tests
[
i
].
flags
,
tests
[
i
].
expect
,
tests
[
i
].
todo
);
check_url_canonicalize
(
tests
[
i
].
url
,
tests
[
i
].
flags
,
tests
[
i
].
expect
);
}
/* ########################### */
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment