Commit a04064b8 authored by Nikolay Sivov's avatar Nikolay Sivov Committed by Alexandre Julliard

xmllite: Improve attribute value parsing.

parent e51ab3c0
...@@ -1766,12 +1766,99 @@ static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *loc ...@@ -1766,12 +1766,99 @@ static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *loc
return S_OK; return S_OK;
} }
/* Applies normalization rules to a single char, used for attribute values.
Rules include 2 steps:
1) replacing \r\n with a single \n;
2) replacing all whitespace chars with ' '.
*/
static void reader_normalize_space(xmlreader *reader, WCHAR *ptr)
{
encoded_buffer *buffer = &reader->input->buffer->utf16;
if (!is_wchar_space(*ptr)) return;
if (*ptr == '\r' && *(ptr+1) == '\n')
{
int len = buffer->written - ((char*)ptr - buffer->data) - 2*sizeof(WCHAR);
memmove(ptr+1, ptr+2, len);
}
*ptr = ' ';
}
/* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
[67] Reference ::= EntityRef | CharRef [67] Reference ::= EntityRef | CharRef
[68] EntityRef ::= '&' Name ';' */ [68] EntityRef ::= '&' Name ';' */
static HRESULT reader_parse_reference(xmlreader *reader) static HRESULT reader_parse_reference(xmlreader *reader)
{ {
FIXME("References not supported\n"); WCHAR *start = reader_get_cur(reader), *ptr;
WCHAR ch = 0;
int len;
/* skip '&' */
reader_skipn(reader, 1);
ptr = reader_get_cur(reader);
if (*ptr == '#')
{
encoded_buffer *buffer = &reader->input->buffer->utf16;
reader_skipn(reader, 1);
ptr = reader_get_cur(reader);
/* hex char or decimal */
if (*ptr == 'x')
{
reader_skipn(reader, 1);
ptr = reader_get_cur(reader);
while (*ptr != ';')
{
if ((*ptr >= '0' && *ptr <= '9'))
ch = ch*16 + *ptr - '0';
else if ((*ptr >= 'a' && *ptr <= 'f'))
ch = ch*16 + *ptr - 'a' + 10;
else if ((*ptr >= 'A' && *ptr <= 'F'))
ch = ch*16 + *ptr - 'A' + 10;
else
return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT;
reader_skipn(reader, 1);
ptr = reader_get_cur(reader);
}
}
else
{
while (*ptr != ';')
{
if ((*ptr >= '0' && *ptr <= '9'))
{
ch = ch*10 + *ptr - '0';
reader_skipn(reader, 1);
ptr = reader_get_cur(reader);
}
else
return ch ? WC_E_SEMICOLON : WC_E_DIGIT;
}
}
if (!is_char(ch)) return WC_E_XMLCHARACTER;
/* normalize */
if (is_wchar_space(ch)) ch = ' ';
len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
memmove(start+1, ptr+1, len);
buffer->cur = (char*)start;
*start = ch;
return S_OK;
}
else
FIXME("Entity references not supported\n");
return E_NOTIMPL; return E_NOTIMPL;
} }
...@@ -1806,7 +1893,10 @@ static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value) ...@@ -1806,7 +1893,10 @@ static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value)
if (FAILED(hr)) return hr; if (FAILED(hr)) return hr;
} }
else else
{
reader_normalize_space(reader, ptr);
reader_skipn(reader, 1); reader_skipn(reader, 1);
}
ptr = reader_get_cur(reader); ptr = reader_get_cur(reader);
} }
...@@ -1848,7 +1938,7 @@ static HRESULT reader_parse_attribute(xmlreader *reader) ...@@ -1848,7 +1938,7 @@ static HRESULT reader_parse_attribute(xmlreader *reader)
hr = reader_parse_attvalue(reader, &value); hr = reader_parse_attvalue(reader, &value);
if (FAILED(hr)) return hr; if (FAILED(hr)) return hr;
TRACE("%s=\"%s\"\n", debugstr_wn(local.str, local.len), debugstr_wn(value.str, value.len)); TRACE("%s=%s\n", debugstr_wn(local.str, local.len), debugstr_wn(value.str, value.len));
return reader_add_attr(reader, &local, &value); return reader_add_attr(reader, &local, &value);
} }
......
...@@ -1571,6 +1571,16 @@ static struct test_entry attributes_tests[] = { ...@@ -1571,6 +1571,16 @@ static struct test_entry attributes_tests[] = {
{ "<a attr1=\'a\"ttrvalue\'/>", "attr1", "a\"ttrvalue", S_OK }, { "<a attr1=\'a\"ttrvalue\'/>", "attr1", "a\"ttrvalue", S_OK },
{ "<a attr1=\' \'/>", "attr1", " ", S_OK }, { "<a attr1=\' \'/>", "attr1", " ", S_OK },
{ "<a attr1=\" \"/>", "attr1", " ", S_OK }, { "<a attr1=\" \"/>", "attr1", " ", S_OK },
{ "<a attr1=\"\r\n \r \n \t\n\r\"/>", "attr1", " ", S_OK },
{ "<a attr1=\" val \"/>", "attr1", " val ", S_OK },
{ "<a attr1=\"\r\n\tval\n\"/>", "attr1", " val ", S_OK },
{ "<a attr1=\"val&#32;\"/>", "attr1", "val ", S_OK },
{ "<a attr1=\"val&#x20;\"/>", "attr1", "val ", S_OK },
{ "<a attr1=\"val&#xfffe;\"/>", NULL, NULL, WC_E_XMLCHARACTER },
{ "<a attr1=\"val &#a;\"/>", NULL, NULL, WC_E_DIGIT, WC_E_SEMICOLON },
{ "<a attr1=\"val &#12a;\"/>", NULL, NULL, WC_E_SEMICOLON },
{ "<a attr1=\"val &#x12g;\"/>", NULL, NULL, WC_E_SEMICOLON },
{ "<a attr1=\"val &#xg;\"/>", NULL, NULL, WC_E_HEXDIGIT, WC_E_SEMICOLON },
{ "<a attr1=attrvalue/>", NULL, NULL, WC_E_QUOTE }, { "<a attr1=attrvalue/>", NULL, NULL, WC_E_QUOTE },
{ "<a attr1=\"attr<value\"/>", NULL, NULL, WC_E_LESSTHAN }, { "<a attr1=\"attr<value\"/>", NULL, NULL, WC_E_LESSTHAN },
{ NULL } { NULL }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment