Commit ce71c50f authored by Hans Leidekker's avatar Hans Leidekker Committed by Alexandre Julliard

webservices: Add support for character set detection.

parent 2eab7118
......@@ -551,6 +551,17 @@ HRESULT WINAPI WsGetReaderProperty( WS_XML_READER *handle, WS_XML_READER_PROPERT
if (error) FIXME( "ignoring error parameter\n" );
if (!reader->input_data) return WS_E_INVALID_OPERATION;
if (id == WS_XML_READER_PROPERTY_CHARSET)
{
WS_CHARSET charset;
HRESULT hr;
if ((hr = get_reader_prop( reader, id, &charset, size )) != S_OK) return hr;
if (!charset) return WS_E_INVALID_FORMAT;
*(WS_CHARSET *)buf = charset;
return S_OK;
}
return get_reader_prop( reader, id, buf, size );
}
......@@ -1373,6 +1384,54 @@ HRESULT WINAPI WsSetErrorProperty( WS_ERROR *handle, WS_ERROR_PROPERTY_ID id, co
return set_error_prop( error, id, value, size );
}
static inline BOOL is_utf8( const unsigned char *data, ULONG size, ULONG *offset )
{
static const char bom[] = {0xef,0xbb,0xbf};
const unsigned char *p = data;
return (size >= sizeof(bom) && !memcmp( p, bom, sizeof(bom) ) && (*offset = sizeof(bom))) ||
(size > 2 && !(*offset = 0));
}
static inline BOOL is_utf16le( const unsigned char *data, ULONG size, ULONG *offset )
{
static const char bom[] = {0xff,0xfe};
const unsigned char *p = data;
return (size >= sizeof(bom) && !memcmp( p, bom, sizeof(bom) ) && (*offset = sizeof(bom))) ||
(size >= 4 && p[0] == '<' && !p[1] && !(*offset = 0));
}
static HRESULT detect_charset( const WS_XML_READER_INPUT *input, WS_CHARSET *charset, ULONG *offset )
{
const WS_XML_READER_BUFFER_INPUT *buf = (const WS_XML_READER_BUFFER_INPUT *)input;
if (input->inputType != WS_XML_READER_INPUT_TYPE_BUFFER)
{
FIXME( "charset detection on input type %u not supported\n", input->inputType );
return E_NOTIMPL;
}
/* FIXME: parse xml declaration */
if (is_utf16le( buf->encodedData, buf->encodedDataSize, offset ))
{
*charset = WS_CHARSET_UTF16LE;
}
else if (is_utf8( buf->encodedData, buf->encodedDataSize, offset ))
{
*charset = WS_CHARSET_UTF8;
}
else
{
FIXME( "charset not recognized\n" );
*charset = 0;
}
TRACE( "detected charset %u\n", *charset );
return S_OK;
}
/**************************************************************************
* WsSetInput [webservices.@]
*/
......@@ -1383,7 +1442,7 @@ HRESULT WINAPI WsSetInput( WS_XML_READER *handle, const WS_XML_READER_ENCODING *
struct reader *reader = (struct reader *)handle;
struct node *node;
HRESULT hr;
ULONG i;
ULONG i, offset = 0;
TRACE( "%p %p %p %p %u %p\n", handle, encoding, input, properties, count, error );
if (error) FIXME( "ignoring error parameter\n" );
......@@ -1395,11 +1454,13 @@ HRESULT WINAPI WsSetInput( WS_XML_READER *handle, const WS_XML_READER_ENCODING *
case WS_XML_READER_ENCODING_TYPE_TEXT:
{
WS_XML_READER_TEXT_ENCODING *text = (WS_XML_READER_TEXT_ENCODING *)encoding;
if (text->charSet != WS_CHARSET_UTF8)
{
FIXME( "charset %u not supported\n", text->charSet );
return E_NOTIMPL;
}
WS_CHARSET charset = text->charSet;
if (charset == WS_CHARSET_AUTO && (hr = detect_charset( input, &charset, &offset )) != S_OK)
return hr;
hr = set_reader_prop( reader, WS_XML_READER_PROPERTY_CHARSET, &charset, sizeof(charset) );
if (hr != S_OK) return hr;
break;
}
default:
......@@ -1411,8 +1472,8 @@ HRESULT WINAPI WsSetInput( WS_XML_READER *handle, const WS_XML_READER_ENCODING *
case WS_XML_READER_INPUT_TYPE_BUFFER:
{
WS_XML_READER_BUFFER_INPUT *buf = (WS_XML_READER_BUFFER_INPUT *)input;
reader->input_data = buf->encodedData;
reader->input_size = buf->encodedDataSize;
reader->input_data = (const char *)buf->encodedData + offset;
reader->input_size = buf->encodedDataSize - offset;
reader->read_bufptr = reader->input_data;
break;
}
......
......@@ -25,7 +25,7 @@ static const char data1[] =
"<?xml version=\"1.0\" encoding=\"utf-8\"?>";
static const char data2[] =
"<text>test</text>";
{0xef,0xbb,0xbf,'<','t','e','x','t','>','t','e','s','t','<','/','t','e','x','t','>',0};
static const char data3[] =
"<?xml version=\"1.0\" encoding=\"utf-8\"?>"
......@@ -226,7 +226,7 @@ static HRESULT set_input( WS_XML_READER *reader, const char *data, ULONG size )
WS_XML_READER_BUFFER_INPUT input;
encoding.encoding.encodingType = WS_XML_READER_ENCODING_TYPE_TEXT;
encoding.charSet = WS_CHARSET_UTF8;
encoding.charSet = WS_CHARSET_AUTO;
input.input.inputType = WS_XML_READER_INPUT_TYPE_BUFFER;
input.encodedData = (void *)data;
......@@ -367,6 +367,31 @@ static void test_WsCreateReader(void)
static void test_WsSetInput(void)
{
static char test1[] = {0xef,0xbb,0xbf,'<','a','/','>'};
static char test2[] = {'<','a','/','>'};
static char test3[] = {'<','!','-','-'};
static char test4[] = {'<','?','x','m','l',' ','v','e','r','s','i','o','n','=','"','1','.','0','"',
' ','e','n','c','o','d','i','n','g','=','"','u','t','f','-','8','"','?','>'};
static char test5[] = {'<','?','x','m','l',' ','e','n','c','o','d','i','n','g','=',
'"','u','t','f','-','8','"','?','>'};
static char test6[] = {'<','?','x','m','l'};
static char test7[] = {'<','?','y','m','l'};
static char test8[] = {'<','?'};
static char test9[] = {'<','!'};
static char test10[] = {0xff,0xfe,'<',0,'a',0,'/',0,'>',0};
static char test11[] = {'<',0,'a',0,'/',0,'>',0};
static char test12[] = {'<',0,'!',0,'-',0,'-',0};
static char test13[] = {'<',0,'?',0};
static char test14[] = {'a','b'};
static char test15[] = {'a','b','c'};
static char test16[] = {'a',0};
static char test17[] = {'a',0,'b',0};
static char test18[] = {'<',0,'a',0,'b',0};
static char test19[] = {'<',0,'a',0};
static char test20[] = {0,'a','b'};
static char test21[] = {0,0};
static char test22[] = {0,0,0};
static char test23[] = {'<',0,'?',0,'x',0,'m',0,'l',0};
HRESULT hr;
WS_XML_READER *reader;
WS_XML_READER_PROPERTY prop;
......@@ -374,7 +399,41 @@ static void test_WsSetInput(void)
WS_XML_READER_BUFFER_INPUT input;
WS_CHARSET charset;
const WS_XML_NODE *node;
ULONG size, max_depth;
ULONG i, size, max_depth;
static const struct
{
void *data;
ULONG size;
HRESULT hr;
WS_CHARSET charset;
int todo;
}
tests[] =
{
{ test1, sizeof(test1), S_OK, WS_CHARSET_UTF8 },
{ test2, sizeof(test2), S_OK, WS_CHARSET_UTF8 },
{ test3, sizeof(test3), S_OK, WS_CHARSET_UTF8 },
{ test4, sizeof(test4), S_OK, WS_CHARSET_UTF8 },
{ test5, sizeof(test5), WS_E_INVALID_FORMAT, 0, 1 },
{ test6, sizeof(test6), WS_E_INVALID_FORMAT, 0, 1 },
{ test7, sizeof(test7), WS_E_INVALID_FORMAT, 0, 1 },
{ test8, sizeof(test8), WS_E_INVALID_FORMAT, 0 },
{ test9, sizeof(test9), WS_E_INVALID_FORMAT, 0 },
{ test10, sizeof(test10), S_OK, WS_CHARSET_UTF16LE },
{ test11, sizeof(test11), S_OK, WS_CHARSET_UTF16LE },
{ test12, sizeof(test12), S_OK, WS_CHARSET_UTF16LE },
{ test13, sizeof(test13), WS_E_INVALID_FORMAT, 0, 1 },
{ test14, sizeof(test14), WS_E_INVALID_FORMAT, 0 },
{ test15, sizeof(test15), S_OK, WS_CHARSET_UTF8 },
{ test16, sizeof(test16), WS_E_INVALID_FORMAT, 0 },
{ test17, sizeof(test17), S_OK, WS_CHARSET_UTF8 },
{ test18, sizeof(test18), S_OK, WS_CHARSET_UTF16LE },
{ test19, sizeof(test19), S_OK, WS_CHARSET_UTF16LE },
{ test20, sizeof(test20), S_OK, WS_CHARSET_UTF8 },
{ test21, sizeof(test21), WS_E_INVALID_FORMAT, 0 },
{ test22, sizeof(test22), S_OK, WS_CHARSET_UTF8 },
{ test23, sizeof(test23), WS_E_INVALID_FORMAT, 0, 1 },
};
hr = WsCreateReader( NULL, 0, &reader, NULL ) ;
ok( hr == S_OK, "got %08x\n", hr );
......@@ -411,14 +470,30 @@ static void test_WsSetInput(void)
/* charset is detected by WsSetInput */
enc.encoding.encodingType = WS_XML_READER_ENCODING_TYPE_TEXT;
enc.charSet = WS_CHARSET_AUTO;
hr = WsSetInput( reader, (WS_XML_READER_ENCODING *)&enc, (WS_XML_READER_INPUT *)&input, NULL, 0, NULL );
todo_wine ok( hr == S_OK, "got %08x\n", hr );
charset = 0xdeadbeef;
size = sizeof(charset);
hr = WsGetReaderProperty( reader, WS_XML_READER_PROPERTY_CHARSET, &charset, size, NULL );
ok( hr == S_OK, "got %08x\n", hr );
ok( charset == WS_CHARSET_UTF8, "got %u\n", charset );
for (i = 0; i < sizeof(tests)/sizeof(tests[0]); i++)
{
input.encodedData = tests[i].data;
input.encodedDataSize = tests[i].size;
hr = WsSetInput( reader, (WS_XML_READER_ENCODING *)&enc, (WS_XML_READER_INPUT *)&input, NULL, 0, NULL );
ok( hr == S_OK, "%u: got %08x\n", i, hr );
charset = 0xdeadbeef;
size = sizeof(charset);
hr = WsGetReaderProperty( reader, WS_XML_READER_PROPERTY_CHARSET, &charset, size, NULL );
if (tests[i].todo)
{
todo_wine ok( hr == tests[i].hr, "%u: got %08x expected %08x\n", i, hr, tests[i].hr );
if (hr == S_OK)
todo_wine ok( charset == tests[i].charset, "%u: got %u expected %u\n", i, charset, tests[i].charset );
}
else
{
ok( hr == tests[i].hr, "%u: got %08x expected %08x\n", i, hr, tests[i].hr );
if (hr == S_OK)
ok( charset == tests[i].charset, "%u: got %u expected %u\n", i, charset, tests[i].charset );
}
}
enc.encoding.encodingType = WS_XML_READER_ENCODING_TYPE_TEXT;
enc.charSet = WS_CHARSET_UTF8;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment