Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
W
wine-winehq
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Registry
Registry
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
wine
wine-winehq
Commits
80cf8838
Commit
80cf8838
authored
Mar 06, 2017
by
Nikolay Sivov
Committed by
Alexandre Julliard
Mar 06, 2017
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
xmllite/reader: Improve input stream encoding detection.
Signed-off-by:
Nikolay Sivov
<
nsivov@codeweavers.com
>
Signed-off-by:
Alexandre Julliard
<
julliard@winehq.org
>
parent
0aaade2c
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
79 additions
and
8 deletions
+79
-8
reader.c
dlls/xmllite/reader.c
+13
-7
reader.c
dlls/xmllite/tests/reader.c
+66
-1
No files found.
dlls/xmllite/reader.c
View file @
80cf8838
/*
* IXmlReader implementation
*
* Copyright 2010, 2012-2013, 2016 Nikolay Sivov
* Copyright 2010, 2012-2013, 2016
-2017
Nikolay Sivov
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
...
...
@@ -93,6 +93,8 @@ static const WCHAR gtW[] = {'>',0};
static
const
WCHAR
commentW
[]
=
{
'<'
,
'!'
,
'-'
,
'-'
,
0
};
static
const
WCHAR
piW
[]
=
{
'<'
,
'?'
,
0
};
static
BOOL
is_namestartchar
(
WCHAR
ch
);
static
const
char
*
debugstr_nodetype
(
XmlNodeType
nodetype
)
{
static
const
char
*
const
type_names
[]
=
...
...
@@ -840,10 +842,9 @@ static inline BOOL readerinput_is_utf8(xmlreaderinput *readerinput)
static
HRESULT
readerinput_detectencoding
(
xmlreaderinput
*
readerinput
,
xml_encoding
*
enc
)
{
encoded_buffer
*
buffer
=
&
readerinput
->
buffer
->
encoded
;
static
const
WCHAR
startW
[]
=
{
'<'
,
'?'
};
static
const
WCHAR
commentW
[]
=
{
'<'
,
'!'
};
static
const
char
utf8bom
[]
=
{
0xef
,
0xbb
,
0xbf
};
static
const
char
utf16lebom
[]
=
{
0xff
,
0xfe
};
WCHAR
*
ptrW
;
*
enc
=
XmlEncoding_Unknown
;
...
...
@@ -854,13 +855,17 @@ static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encod
if
(
buffer
->
written
<=
3
)
return
MX_E_INPUTEND
;
}
ptrW
=
(
WCHAR
*
)
buffer
->
data
;
/* try start symbols if we have enough data to do that, input buffer should contain
first chunk already */
if
(
readerinput_is_utf8
(
readerinput
))
*
enc
=
XmlEncoding_UTF8
;
else
if
(
!
memcmp
(
buffer
->
data
,
startW
,
sizeof
(
startW
))
||
!
memcmp
(
buffer
->
data
,
commentW
,
sizeof
(
commentW
)))
*
enc
=
XmlEncoding_UTF16
;
else
if
(
*
ptrW
==
'<'
)
{
ptrW
++
;
if
(
*
ptrW
==
'?'
||
*
ptrW
==
'!'
||
is_namestartchar
(
*
ptrW
))
*
enc
=
XmlEncoding_UTF16
;
}
/* try with BOM now */
else
if
(
!
memcmp
(
buffer
->
data
,
utf8bom
,
sizeof
(
utf8bom
)))
{
...
...
@@ -2492,7 +2497,8 @@ static HRESULT reader_parse_nextnode(xmlreader *reader)
/* try to detect encoding by BOM or data and set input code page */
hr
=
readerinput_detectencoding
(
reader
->
input
,
&
enc
);
TRACE
(
"detected encoding %s, 0x%08x
\n
"
,
debugstr_w
(
xml_encoding_map
[
enc
].
name
),
hr
);
TRACE
(
"detected encoding %s, 0x%08x
\n
"
,
enc
==
XmlEncoding_Unknown
?
"(unknown)"
:
debugstr_w
(
xml_encoding_map
[
enc
].
name
),
hr
);
if
(
FAILED
(
hr
))
return
hr
;
/* always switch first time cause we have to put something in */
...
...
dlls/xmllite/tests/reader.c
View file @
80cf8838
...
...
@@ -49,7 +49,7 @@ static void free_str(WCHAR *str)
static
const
char
xmldecl_full
[]
=
"
\xef\xbb\xbf
<?xml version=
\"
1.0
\"
encoding=
\"
UTF-8
\"
standalone=
\"
yes
\"
?>
\n
"
;
static
const
char
xmldecl_short
[]
=
"<?xml version=
\"
1.0
\"
?><RegistrationInfo/>"
;
static
IStream
*
create_stream_on_data
(
const
char
*
data
,
int
size
)
static
IStream
*
create_stream_on_data
(
const
void
*
data
,
unsigned
int
size
)
{
IStream
*
stream
=
NULL
;
HGLOBAL
hglobal
;
...
...
@@ -2086,6 +2086,70 @@ static void test_read_charref(void)
IStream_Release
(
stream
);
}
static
void
test_encoding_detection
(
void
)
{
static
const
struct
encoding_testW
{
WCHAR
text
[
16
];
}
encoding_testsW
[]
=
{
{
{
'<'
,
'?'
,
'p'
,
'i'
,
' '
,
'?'
,
'>'
,
0
}
},
{
{
'<'
,
'!'
,
'-'
,
'-'
,
' '
,
'c'
,
'-'
,
'-'
,
'>'
,
0
}
},
{
{
0xfeff
,
'<'
,
'a'
,
'/'
,
'>'
,
0
}
},
{
{
'<'
,
'a'
,
'/'
,
'>'
,
0
}
},
};
static
const
char
*
encoding_testsA
[]
=
{
"<?pi ?>"
,
"<!-- comment -->"
,
"
\xef\xbb\xbf
<a/>"
,
/* UTF-8 BOM */
"<a/>"
,
};
IXmlReader
*
reader
;
XmlNodeType
type
;
IStream
*
stream
;
unsigned
int
i
;
HRESULT
hr
;
hr
=
CreateXmlReader
(
&
IID_IXmlReader
,
(
void
**
)
&
reader
,
NULL
);
ok
(
hr
==
S_OK
,
"S_OK, got %08x
\n
"
,
hr
);
/* there's no way to query detected encoding back, so just verify that document is browsable */
for
(
i
=
0
;
i
<
sizeof
(
encoding_testsA
)
/
sizeof
(
encoding_testsA
[
0
]);
i
++
)
{
stream
=
create_stream_on_data
(
encoding_testsA
[
i
],
strlen
(
encoding_testsA
[
i
]));
hr
=
IXmlReader_SetInput
(
reader
,
(
IUnknown
*
)
stream
);
ok
(
hr
==
S_OK
,
"got %08x
\n
"
,
hr
);
type
=
XmlNodeType_None
;
hr
=
IXmlReader_Read
(
reader
,
&
type
);
ok
(
hr
==
S_OK
,
"got %08x
\n
"
,
hr
);
ok
(
type
!=
XmlNodeType_None
,
"Unexpected node type %d
\n
"
,
type
);
IStream_Release
(
stream
);
}
for
(
i
=
0
;
i
<
sizeof
(
encoding_testsW
)
/
sizeof
(
encoding_testsW
[
0
]);
i
++
)
{
stream
=
create_stream_on_data
(
encoding_testsW
[
i
].
text
,
lstrlenW
(
encoding_testsW
[
i
].
text
)
*
sizeof
(
WCHAR
));
hr
=
IXmlReader_SetInput
(
reader
,
(
IUnknown
*
)
stream
);
ok
(
hr
==
S_OK
,
"got %08x
\n
"
,
hr
);
type
=
XmlNodeType_None
;
hr
=
IXmlReader_Read
(
reader
,
&
type
);
ok
(
hr
==
S_OK
,
"%u: got %08x
\n
"
,
i
,
hr
);
ok
(
type
!=
XmlNodeType_None
,
"%u: unexpected node type %d
\n
"
,
i
,
type
);
IStream_Release
(
stream
);
}
IXmlReader_Release
(
reader
);
}
START_TEST
(
reader
)
{
test_reader_create
();
...
...
@@ -2108,4 +2172,5 @@ START_TEST(reader)
test_prefix
();
test_namespaceuri
();
test_read_charref
();
test_encoding_detection
();
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment