Commit d6937901 authored by Nikolay Sivov's avatar Nikolay Sivov Committed by Alexandre Julliard

xmllite: Initial support for reader input encoding detection.

parent d16f4e70
...@@ -117,6 +117,14 @@ static inline void *m_alloc(IMalloc *imalloc, size_t len) ...@@ -117,6 +117,14 @@ static inline void *m_alloc(IMalloc *imalloc, size_t len)
return heap_alloc(len); return heap_alloc(len);
} }
static inline void *m_realloc(IMalloc *imalloc, void *mem, size_t len)
{
if (imalloc)
return IMalloc_Realloc(imalloc, mem, len);
else
return heap_realloc(mem, len);
}
static inline void m_free(IMalloc *imalloc, void *mem) static inline void m_free(IMalloc *imalloc, void *mem)
{ {
if (imalloc) if (imalloc)
...@@ -142,6 +150,11 @@ static inline void *readerinput_alloc(xmlreaderinput *input, size_t len) ...@@ -142,6 +150,11 @@ static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
return m_alloc(input->imalloc, len); return m_alloc(input->imalloc, len);
} }
static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
{
return m_realloc(input->imalloc, mem, len);
}
static inline void readerinput_free(xmlreaderinput *input, void *mem) static inline void readerinput_free(xmlreaderinput *input, void *mem)
{ {
return m_free(input->imalloc, mem); return m_free(input->imalloc, mem);
...@@ -165,7 +178,7 @@ static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer) ...@@ -165,7 +178,7 @@ static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
readerinput_free(input, buffer->data); readerinput_free(input, buffer->data);
} }
static HRESULT get_code_page(xml_encoding encoding, UINT *cp) static HRESULT get_code_page(xml_encoding encoding, xmlreaderinput *input)
{ {
const struct xml_encoding_data *data; const struct xml_encoding_data *data;
...@@ -176,12 +189,12 @@ static HRESULT get_code_page(xml_encoding encoding, UINT *cp) ...@@ -176,12 +189,12 @@ static HRESULT get_code_page(xml_encoding encoding, UINT *cp)
} }
data = &xml_encoding_map[encoding]; data = &xml_encoding_map[encoding];
*cp = data->cp; input->buffer->code_page = data->cp;
return S_OK; return S_OK;
} }
static HRESULT alloc_input_buffer(xmlreaderinput *input, xml_encoding encoding) static HRESULT alloc_input_buffer(xmlreaderinput *input)
{ {
input_buffer *buffer; input_buffer *buffer;
HRESULT hr; HRESULT hr;
...@@ -192,29 +205,20 @@ static HRESULT alloc_input_buffer(xmlreaderinput *input, xml_encoding encoding) ...@@ -192,29 +205,20 @@ static HRESULT alloc_input_buffer(xmlreaderinput *input, xml_encoding encoding)
if (!buffer) return E_OUTOFMEMORY; if (!buffer) return E_OUTOFMEMORY;
buffer->input = input; buffer->input = input;
hr = get_code_page(encoding, &buffer->code_page); buffer->code_page = ~0; /* code page is unknown at this point */
hr = init_encoded_buffer(input, &buffer->utf16);
if (hr != S_OK) { if (hr != S_OK) {
readerinput_free(input, buffer); readerinput_free(input, buffer);
return hr; return hr;
} }
hr = init_encoded_buffer(input, &buffer->utf16); hr = init_encoded_buffer(input, &buffer->encoded);
if (hr != S_OK) { if (hr != S_OK) {
free_encoded_buffer(input, &buffer->utf16);
readerinput_free(input, buffer); readerinput_free(input, buffer);
return hr; return hr;
} }
if (encoding != XmlEncoding_UTF16) {
hr = init_encoded_buffer(input, &buffer->encoded);
if (hr != S_OK) {
free_encoded_buffer(input, &buffer->utf16);
readerinput_free(input, buffer);
return hr;
}
}
else
memset(&buffer->encoded, 0, sizeof(buffer->encoded));
input->buffer = buffer; input->buffer = buffer;
return S_OK; return S_OK;
} }
...@@ -226,7 +230,7 @@ static void free_input_buffer(input_buffer *buffer) ...@@ -226,7 +230,7 @@ static void free_input_buffer(input_buffer *buffer)
readerinput_free(buffer->input, buffer); readerinput_free(buffer->input, buffer);
} }
static void xmlreaderinput_release_stream(xmlreaderinput *readerinput) static void readerinput_release_stream(xmlreaderinput *readerinput)
{ {
if (readerinput->stream) { if (readerinput->stream) {
ISequentialStream_Release(readerinput->stream); ISequentialStream_Release(readerinput->stream);
...@@ -236,11 +240,11 @@ static void xmlreaderinput_release_stream(xmlreaderinput *readerinput) ...@@ -236,11 +240,11 @@ static void xmlreaderinput_release_stream(xmlreaderinput *readerinput)
/* Queries already stored interface for IStream/ISequentialStream. /* Queries already stored interface for IStream/ISequentialStream.
Interface supplied on creation will be overwritten */ Interface supplied on creation will be overwritten */
static HRESULT xmlreaderinput_query_for_stream(xmlreaderinput *readerinput) static HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput)
{ {
HRESULT hr; HRESULT hr;
xmlreaderinput_release_stream(readerinput); readerinput_release_stream(readerinput);
hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream); hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
if (hr != S_OK) if (hr != S_OK)
hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream); hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
...@@ -248,6 +252,59 @@ static HRESULT xmlreaderinput_query_for_stream(xmlreaderinput *readerinput) ...@@ -248,6 +252,59 @@ static HRESULT xmlreaderinput_query_for_stream(xmlreaderinput *readerinput)
return hr; return hr;
} }
/* reads a chunk to raw buffer */
static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
{
encoded_buffer *buffer = &readerinput->buffer->encoded;
ULONG len = buffer->allocated - buffer->written, read;
HRESULT hr;
/* always try to get aligned to 4 bytes, so the only case we can get partialy read characters is
variable width encodings like UTF-8 */
len = (len + 3) & ~3;
/* try to use allocated space or grow */
if (buffer->allocated - buffer->written < len)
{
buffer->allocated *= 2;
buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
len = buffer->allocated - buffer->written;
}
hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
if (FAILED(hr)) return hr;
TRACE("requested %d, read %d, ret 0x%08x\n", len, read, hr);
buffer->written += read;
return hr;
}
static xml_encoding readerinput_detectencoding(xmlreaderinput *readerinput)
{
encoded_buffer *buffer = &readerinput->buffer->encoded;
/* try start symbols if we have enough data to do that, input buffer should contain
first chunk already */
if (buffer->written >= 4)
{
static char startA[] = {'<','?','x','m'};
static WCHAR startW[] = {'<','?'};
if (!memcmp(buffer->data, startA, sizeof(startA))) return XmlEncoding_UTF8;
if (!memcmp(buffer->data, startW, sizeof(startW))) return XmlEncoding_UTF16;
}
/* try with BOM now */
if (buffer->written >= 3)
{
static char utf8bom[] = {0xef,0xbb,0xbf};
static char utf16lebom[] = {0xff,0xfe};
if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom))) return XmlEncoding_UTF8;
if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom))) return XmlEncoding_UTF16;
}
return XmlEncoding_Unknown;
}
static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject) static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
{ {
xmlreader *This = impl_from_IXmlReader(iface); xmlreader *This = impl_from_IXmlReader(iface);
...@@ -305,7 +362,7 @@ static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input) ...@@ -305,7 +362,7 @@ static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
if (This->input) if (This->input)
{ {
xmlreaderinput_release_stream(This->input); readerinput_release_stream(This->input);
IUnknown_Release(&This->input->IXmlReaderInput_iface); IUnknown_Release(&This->input->IXmlReaderInput_iface);
This->input = NULL; This->input = NULL;
} }
...@@ -333,7 +390,7 @@ static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input) ...@@ -333,7 +390,7 @@ static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
} }
/* set stream for supplied IXmlReaderInput */ /* set stream for supplied IXmlReaderInput */
hr = xmlreaderinput_query_for_stream(This->input); hr = readerinput_query_for_stream(This->input);
if (hr == S_OK) if (hr == S_OK)
This->state = XmlReadState_Initial; This->state = XmlReadState_Initial;
...@@ -386,7 +443,27 @@ static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LO ...@@ -386,7 +443,27 @@ static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LO
static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *node_type) static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *node_type)
{ {
FIXME("(%p %p): stub\n", iface, node_type); xmlreader *This = impl_from_IXmlReader(iface);
FIXME("(%p)->(%p): stub\n", This, node_type);
if (This->state == XmlReadState_Closed) return S_FALSE;
/* if it's a first call for a new input we need to detect stream encoding */
if (This->state == XmlReadState_Initial)
{
xml_encoding enc;
HRESULT hr;
hr = readerinput_growraw(This->input);
if (FAILED(hr)) return hr;
/* try to detect encoding by BOM or data and set input code page */
enc = readerinput_detectencoding(This->input);
TRACE("detected encoding %d\n", enc);
get_code_page(enc, This->input);
}
return E_NOTIMPL; return E_NOTIMPL;
} }
...@@ -683,7 +760,7 @@ HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream, ...@@ -683,7 +760,7 @@ HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream,
readerinput->stream = NULL; readerinput->stream = NULL;
if (imalloc) IMalloc_AddRef(imalloc); if (imalloc) IMalloc_AddRef(imalloc);
hr = alloc_input_buffer(readerinput, XmlEncoding_UTF16); hr = alloc_input_buffer(readerinput);
if (hr != S_OK) if (hr != S_OK)
{ {
readerinput_free(readerinput, readerinput); readerinput_free(readerinput, readerinput);
......
...@@ -551,15 +551,24 @@ static void test_readerinput(void) ...@@ -551,15 +551,24 @@ static void test_readerinput(void)
static void test_reader_state(void) static void test_reader_state(void)
{ {
IXmlReader *reader; IXmlReader *reader;
XmlNodeType nodetype;
HRESULT hr; HRESULT hr;
hr = pCreateXmlReader(&IID_IXmlReader, (LPVOID*)&reader, NULL); hr = pCreateXmlReader(&IID_IXmlReader, (void**)&reader, NULL);
ok(hr == S_OK, "Expected S_OK, got %08x\n", hr); ok(hr == S_OK, "Expected S_OK, got %08x\n", hr);
/* invalid arguments */ /* invalid arguments */
hr = IXmlReader_GetProperty(reader, XmlReaderProperty_ReadState, NULL); hr = IXmlReader_GetProperty(reader, XmlReaderProperty_ReadState, NULL);
ok(hr == E_INVALIDARG, "Expected E_INVALIDARG, got %08x\n", hr); ok(hr == E_INVALIDARG, "Expected E_INVALIDARG, got %08x\n", hr);
/* attempt to read on closed reader */
test_read_state(reader, XmlReadState_Closed, -1, 0);
if (0)
{
/* newer versions crash here, probably cause no input was set */
hr = IXmlReader_Read(reader, &nodetype);
ok(hr == S_FALSE, "got %08x\n", hr);
}
IXmlReader_Release(reader); IXmlReader_Release(reader);
} }
......
...@@ -27,6 +27,11 @@ static inline void *heap_alloc(size_t len) ...@@ -27,6 +27,11 @@ static inline void *heap_alloc(size_t len)
return HeapAlloc(GetProcessHeap(), 0, len); return HeapAlloc(GetProcessHeap(), 0, len);
} }
static inline void *heap_realloc(void *mem, size_t len)
{
return HeapReAlloc(GetProcessHeap(), 0, mem, len);
}
static inline BOOL heap_free(void *mem) static inline BOOL heap_free(void *mem)
{ {
return HeapFree(GetProcessHeap(), 0, mem); return HeapFree(GetProcessHeap(), 0, mem);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment