Commit 61483a58 authored by Nikolay Sivov's avatar Nikolay Sivov Committed by Alexandre Julliard

xmllite: Track internal parser state to select expected node type.

parent ef56b7a4
......@@ -44,6 +44,14 @@ typedef enum
XmlEncoding_Unknown
} xml_encoding;
typedef enum
{
XmlReadInState_Initial,
XmlReadInState_XmlDecl,
XmlReadInState_Misc_DTD,
XmlReadInState_DTD
} XmlReaderInternalState;
static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
static const WCHAR utf8W[] = {'U','T','F','-','8',0};
......@@ -109,6 +117,7 @@ typedef struct _xmlreader
xmlreaderinput *input;
IMalloc *imalloc;
XmlReadState state;
XmlReaderInternalState instate;
XmlNodeType nodetype;
DtdProcessing dtdmode;
UINT line, pos; /* reader position in XML stream */
......@@ -457,6 +466,7 @@ static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding
encoded_buffer *dest = &readerinput->buffer->utf16;
int len, dest_len;
HRESULT hr;
WCHAR *ptr;
UINT cp;
hr = get_code_page(enc, &cp);
......@@ -477,8 +487,9 @@ static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding
dest_len = MultiByteToWideChar(cp, 0, src->cur, len, NULL, 0);
readerinput_grow(readerinput, dest_len);
MultiByteToWideChar(cp, 0, src->cur, len, (WCHAR*)dest->data, dest_len);
dest->data[dest_len] = 0;
ptr = (WCHAR*)dest->data;
MultiByteToWideChar(cp, 0, src->cur, len, ptr, dest_len);
ptr[dest_len] = 0;
readerinput->buffer->code_page = cp;
}
......@@ -515,13 +526,18 @@ static void reader_skipn(xmlreader *reader, int n)
}
}
static inline int is_wchar_space(WCHAR ch)
{
return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
}
/* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
static int reader_skipspaces(xmlreader *reader)
{
encoded_buffer *buffer = &reader->input->buffer->utf16;
const WCHAR *ptr = reader_get_cur(reader), *start = ptr;
while (*ptr == ' ' || *ptr == '\t' || *ptr == '\r' || *ptr == '\n')
while (is_wchar_space(*ptr))
{
buffer->cur += sizeof(WCHAR);
if (*ptr == '\r')
......@@ -753,9 +769,105 @@ static HRESULT reader_parse_xmldecl(xmlreader *reader)
if (reader_cmp(reader, declcloseW)) return WC_E_XMLDECL;
reader_skipn(reader, 2);
reader->nodetype = XmlNodeType_XmlDeclaration;
return S_OK;
}
/* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
static HRESULT reader_parse_comment(xmlreader *reader)
{
FIXME("comments not supported\n");
return E_NOTIMPL;
}
/* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
static HRESULT reader_parse_pi(xmlreader *reader)
{
FIXME("PI not supported\n");
return E_NOTIMPL;
}
/* [27] Misc ::= Comment | PI | S */
static HRESULT reader_parse_misc(xmlreader *reader)
{
HRESULT hr = S_FALSE;
while (1)
{
static const WCHAR commentW[] = {'<','!','-','-',0};
static const WCHAR piW[] = {'<','?',0};
const WCHAR *cur = reader_get_cur(reader);
if (is_wchar_space(*cur))
reader_skipspaces(reader);
else if (!reader_cmp(reader, commentW))
hr = reader_parse_comment(reader);
else if (!reader_cmp(reader, piW))
hr = reader_parse_pi(reader);
else
break;
if (FAILED(hr)) return hr;
cur = reader_get_cur(reader);
}
return hr;
}
static HRESULT reader_parse_nextnode(xmlreader *reader)
{
HRESULT hr;
while (1)
{
switch (reader->instate)
{
/* if it's a first call for a new input we need to detect stream encoding */
case XmlReadInState_Initial:
{
xml_encoding enc;
hr = readerinput_growraw(reader->input);
if (FAILED(hr)) return hr;
/* try to detect encoding by BOM or data and set input code page */
hr = readerinput_detectencoding(reader->input, &enc);
TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map[enc].name), hr);
if (FAILED(hr)) return hr;
/* always switch first time cause we have to put something in */
readerinput_switchencoding(reader->input, enc);
/* parse xml declaration */
hr = reader_parse_xmldecl(reader);
if (FAILED(hr)) return hr;
reader->instate = XmlReadInState_Misc_DTD;
if (hr == S_OK) return hr;
}
break;
case XmlReadInState_Misc_DTD:
hr = reader_parse_misc(reader);
if (FAILED(hr)) return hr;
if (hr == S_FALSE)
{
reader->instate = XmlReadInState_DTD;
return S_OK;
}
break;
case XmlReadInState_DTD:
FIXME("DTD parsing not supported\n");
return E_NOTIMPL;
default:
FIXME("internal state %d not handled\n", reader->instate);
return E_NOTIMPL;
}
}
return E_NOTIMPL;
}
static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
{
xmlreader *This = impl_from_IXmlReader(iface);
......@@ -844,7 +956,10 @@ static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
/* set stream for supplied IXmlReaderInput */
hr = readerinput_query_for_stream(This->input);
if (hr == S_OK)
{
This->state = XmlReadState_Initial;
This->instate = XmlReadInState_Initial;
}
return hr;
}
......@@ -893,44 +1008,22 @@ static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LO
return S_OK;
}
static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *node_type)
static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
{
xmlreader *This = impl_from_IXmlReader(iface);
XmlNodeType oldtype = This->nodetype;
HRESULT hr;
FIXME("(%p)->(%p): stub\n", This, node_type);
FIXME("(%p)->(%p): stub\n", This, nodetype);
if (This->state == XmlReadState_Closed) return S_FALSE;
/* if it's a first call for a new input we need to detect stream encoding */
if (This->state == XmlReadState_Initial)
{
xml_encoding enc;
HRESULT hr;
hr = readerinput_growraw(This->input);
if (FAILED(hr)) return hr;
/* try to detect encoding by BOM or data and set input code page */
hr = readerinput_detectencoding(This->input, &enc);
TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map[enc].name), hr);
if (FAILED(hr)) return hr;
/* always switch first time cause we have to put something in */
readerinput_switchencoding(This->input, enc);
hr = reader_parse_nextnode(This);
if (oldtype == XmlNodeType_None && This->nodetype != oldtype)
This->state = XmlReadState_Interactive;
if (hr == S_OK) *nodetype = This->nodetype;
/* parse xml declaration */
hr = reader_parse_xmldecl(This);
if (FAILED(hr)) return hr;
if (hr == S_OK)
{
This->state = XmlReadState_Interactive;
This->nodetype = *node_type = XmlNodeType_XmlDeclaration;
return S_OK;
}
}
return E_NOTIMPL;
return hr;
}
static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
......@@ -1223,6 +1316,7 @@ HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
reader->ref = 1;
reader->input = NULL;
reader->state = XmlReadState_Closed;
reader->instate = XmlReadInState_Initial;
reader->dtdmode = DtdProcessing_Prohibit;
reader->line = reader->pos = 0;
reader->imalloc = imalloc;
......
......@@ -497,6 +497,7 @@ static void test_readerinput(void)
IStream_Release(stream);
/* test input interface selection sequence */
input = NULL;
hr = testinput_createinstance((void**)&input);
ok(hr == S_OK, "Expected S_OK, got %08x\n", hr);
......@@ -688,10 +689,70 @@ todo_wine {
ok(hr == S_OK, "got %08x\n", hr);
ok(type == XmlNodeType_XmlDeclaration, "got %d\n", type);
type = XmlNodeType_XmlDeclaration;
hr = IXmlReader_Read(reader, &type);
/* newer versions return syntax error here cause document is incomplete,
it makes more sense than invalid char error */
todo_wine {
ok(hr == WC_E_SYNTAX || broken(hr == WC_E_XMLCHARACTER), "got 0x%08x\n", hr);
ok(type == XmlNodeType_None, "got %d\n", type);
}
IStream_Release(stream);
IXmlReader_Release(reader);
}
static const char xml_comment[] = "\xef\xbb\xbf<!-- comment -->";
static const char xml_comment1[] = "\xef\xbb\xbf<!-- - comment-->";
static const char xml_comment2[] = "\xef\xbb\xbf<!-- -- comment-->";
static void test_read_comment(void)
{
HRESULT hr;
IStream *stream;
IXmlReader *reader;
XmlNodeType type;
hr = pCreateXmlReader(&IID_IXmlReader, (void**)&reader, NULL);
ok(hr == S_OK, "S_OK, got %08x\n", hr);
stream = create_stream_on_data(xml_comment, sizeof(xml_comment));
hr = IXmlReader_SetInput(reader, (IUnknown*)stream);
ok(hr == S_OK, "got %08x\n", hr);
type = XmlNodeType_None;
hr = IXmlReader_Read(reader, &type);
todo_wine {
ok(hr == S_OK, "got %08x\n", hr);
ok(type == XmlNodeType_Comment, "got %d\n", type);
}
IStream_Release(stream);
stream = create_stream_on_data(xml_comment1, sizeof(xml_comment1));
hr = IXmlReader_SetInput(reader, (IUnknown*)stream);
ok(hr == S_OK, "got %08x\n", hr);
type = XmlNodeType_None;
hr = IXmlReader_Read(reader, &type);
todo_wine {
ok(hr == S_OK, "got %08x\n", hr);
ok(type == XmlNodeType_Comment, "got %d\n", type);
}
IStream_Release(stream);
stream = create_stream_on_data(xml_comment2, sizeof(xml_comment2));
hr = IXmlReader_SetInput(reader, (IUnknown*)stream);
ok(hr == S_OK, "got %08x\n", hr);
type = XmlNodeType_None;
hr = IXmlReader_Read(reader, &type);
todo_wine
ok(hr == WC_E_COMMENT || broken(hr == WC_E_GREATERTHAN), "got %08x\n", hr);
ok(type == XmlNodeType_None, "got %d\n", type);
IStream_Release(stream);
IXmlReader_Release(reader);
}
START_TEST(reader)
{
HRESULT r;
......@@ -708,6 +769,7 @@ START_TEST(reader)
test_reader_create();
test_readerinput();
test_reader_state();
test_read_comment();
test_read_xmldeclaration();
CoUninitialize();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment