blob: 4f4d2687f113060d1838ecb1b1803a27fb614b77 [file] [log] [blame]
/* Run the Expat test suite
__ __ _
___\ \/ /_ __ __ _| |_
/ _ \\ /| '_ \ / _` | __|
| __// \| |_) | (_| | |_
\___/_/\_\ .__/ \__,_|\__|
|_| XML parser
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
Copyright (c) 2000-2017 Expat development team
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to permit
persons to whom the Software is furnished to do so, subject to the
following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#if defined(NDEBUG)
# undef NDEBUG /* because test suite relies on assert(...) at the moment */
#endif
#ifdef HAVE_EXPAT_CONFIG_H
# include <expat_config.h>
#endif
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stddef.h> /* ptrdiff_t */
#include <ctype.h>
#include <limits.h>
#if defined(_WIN32) && defined(_MSC_VER) && (_MSC_VER < 1600)
/* For vs2003/7.1 up to vs2008/9.0; _MSC_VER 1600 is vs2010/10.0 */
#if defined(_WIN64)
typedef __int64 intptr_t;
#else
typedef __int32 intptr_t;
#endif
typedef unsigned __int64 uint64_t;
#else
#include <stdint.h> /* intptr_t uint64_t */
#endif
#if ! defined(__cplusplus)
# if defined(_MSC_VER) && (_MSC_VER <= 1700)
/* for vs2012/11.0/1700 and earlier Visual Studio compilers */
# define bool int
# define false 0
# define true 1
# else
# include <stdbool.h>
# endif
#endif
#include "expat.h"
#include "chardata.h"
#include "structdata.h"
#include "internal.h" /* for UNUSED_P only */
#include "minicheck.h"
#include "memcheck.h"
#include "siphash.h"
#include "ascii.h" /* for ASCII_xxx */
#ifdef XML_LARGE_SIZE
# define XML_FMT_INT_MOD "ll"
#else
# define XML_FMT_INT_MOD "l"
#endif
#ifdef XML_UNICODE_WCHAR_T
# define XML_FMT_CHAR "lc"
# define XML_FMT_STR "ls"
# include <wchar.h>
# define xcstrlen(s) wcslen(s)
# define xcstrcmp(s, t) wcscmp((s), (t))
# define xcstrncmp(s, t, n) wcsncmp((s), (t), (n))
# define XCS(s) _XCS(s)
# define _XCS(s) L ## s
#else
# ifdef XML_UNICODE
# error "No support for UTF-16 character without wchar_t in tests"
# else
# define XML_FMT_CHAR "c"
# define XML_FMT_STR "s"
# define xcstrlen(s) strlen(s)
# define xcstrcmp(s, t) strcmp((s), (t))
# define xcstrncmp(s, t, n) strncmp((s), (t), (n))
# define XCS(s) s
# endif /* XML_UNICODE */
#endif /* XML_UNICODE_WCHAR_T */
static XML_Parser parser = NULL;
static void
basic_setup(void)
{
parser = XML_ParserCreate(NULL);
if (parser == NULL)
fail("Parser not created.");
}
static void
basic_teardown(void)
{
if (parser != NULL) {
XML_ParserFree(parser);
parser = NULL;
}
}
/* Generate a failure using the parser state to create an error message;
this should be used when the parser reports an error we weren't
expecting.
*/
static void
_xml_failure(XML_Parser parser, const char *file, int line)
{
char buffer[1024];
enum XML_Error err = XML_GetErrorCode(parser);
sprintf(buffer,
" %d: %" XML_FMT_STR " (line %"
XML_FMT_INT_MOD "u, offset %"
XML_FMT_INT_MOD "u)\n reported from %s, line %d\n",
err,
XML_ErrorString(err),
XML_GetCurrentLineNumber(parser),
XML_GetCurrentColumnNumber(parser),
file, line);
_fail_unless(0, file, line, buffer);
}
static enum XML_Status
_XML_Parse_SINGLE_BYTES(XML_Parser parser, const char *s, int len, int isFinal)
{
enum XML_Status res = XML_STATUS_ERROR;
int offset = 0;
if (len == 0) {
return XML_Parse(parser, s, len, isFinal);
}
for (; offset < len; offset++) {
const int innerIsFinal = (offset == len - 1) && isFinal;
const char c = s[offset]; /* to help out-of-bounds detection */
res = XML_Parse(parser, &c, sizeof(char), innerIsFinal);
if (res != XML_STATUS_OK) {
return res;
}
}
return res;
}
#define xml_failure(parser) _xml_failure((parser), __FILE__, __LINE__)
static void
_expect_failure(const char *text, enum XML_Error errorCode, const char *errorMessage,
const char *file, int lineno)
{
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_OK)
/* Hackish use of _fail_unless() macro, but let's us report
the right filename and line number. */
_fail_unless(0, file, lineno, errorMessage);
if (XML_GetErrorCode(parser) != errorCode)
_xml_failure(parser, file, lineno);
}
#define expect_failure(text, errorCode, errorMessage) \
_expect_failure((text), (errorCode), (errorMessage), \
__FILE__, __LINE__)
/* Dummy handlers for when we need to set a handler to tickle a bug,
but it doesn't need to do anything.
*/
static unsigned long dummy_handler_flags = 0;
#define DUMMY_START_DOCTYPE_HANDLER_FLAG (1UL << 0)
#define DUMMY_END_DOCTYPE_HANDLER_FLAG (1UL << 1)
#define DUMMY_ENTITY_DECL_HANDLER_FLAG (1UL << 2)
#define DUMMY_NOTATION_DECL_HANDLER_FLAG (1UL << 3)
#define DUMMY_ELEMENT_DECL_HANDLER_FLAG (1UL << 4)
#define DUMMY_ATTLIST_DECL_HANDLER_FLAG (1UL << 5)
#define DUMMY_COMMENT_HANDLER_FLAG (1UL << 6)
#define DUMMY_PI_HANDLER_FLAG (1UL << 7)
#define DUMMY_START_ELEMENT_HANDLER_FLAG (1UL << 8)
#define DUMMY_START_CDATA_HANDLER_FLAG (1UL << 9)
#define DUMMY_END_CDATA_HANDLER_FLAG (1UL << 10)
#define DUMMY_UNPARSED_ENTITY_DECL_HANDLER_FLAG (1UL << 11)
#define DUMMY_START_NS_DECL_HANDLER_FLAG (1UL << 12)
#define DUMMY_END_NS_DECL_HANDLER_FLAG (1UL << 13)
#define DUMMY_START_DOCTYPE_DECL_HANDLER_FLAG (1UL << 14)
#define DUMMY_END_DOCTYPE_DECL_HANDLER_FLAG (1UL << 15)
#define DUMMY_SKIP_HANDLER_FLAG (1UL << 16)
#define DUMMY_DEFAULT_HANDLER_FLAG (1UL << 17)
static void XMLCALL
dummy_xdecl_handler(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(version),
const XML_Char *UNUSED_P(encoding),
int UNUSED_P(standalone))
{}
static void XMLCALL
dummy_start_doctype_handler(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(doctypeName),
const XML_Char *UNUSED_P(sysid),
const XML_Char *UNUSED_P(pubid),
int UNUSED_P(has_internal_subset))
{
dummy_handler_flags |= DUMMY_START_DOCTYPE_HANDLER_FLAG;
}
static void XMLCALL
dummy_end_doctype_handler(void *UNUSED_P(userData))
{
dummy_handler_flags |= DUMMY_END_DOCTYPE_HANDLER_FLAG;
}
static void XMLCALL
dummy_entity_decl_handler(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(entityName),
int UNUSED_P(is_parameter_entity),
const XML_Char *UNUSED_P(value),
int UNUSED_P(value_length),
const XML_Char *UNUSED_P(base),
const XML_Char *UNUSED_P(systemId),
const XML_Char *UNUSED_P(publicId),
const XML_Char *UNUSED_P(notationName))
{
dummy_handler_flags |= DUMMY_ENTITY_DECL_HANDLER_FLAG;
}
static void XMLCALL
dummy_notation_decl_handler(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(notationName),
const XML_Char *UNUSED_P(base),
const XML_Char *UNUSED_P(systemId),
const XML_Char *UNUSED_P(publicId))
{
dummy_handler_flags |= DUMMY_NOTATION_DECL_HANDLER_FLAG;
}
static void XMLCALL
dummy_element_decl_handler(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(name),
XML_Content *model)
{
/* The content model must be freed by the handler. Unfortunately
* we cannot pass the parser as the userData because this is used
* with other handlers that require other userData.
*/
XML_FreeContentModel(parser, model);
dummy_handler_flags |= DUMMY_ELEMENT_DECL_HANDLER_FLAG;
}
static void XMLCALL
dummy_attlist_decl_handler(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(elname),
const XML_Char *UNUSED_P(attname),
const XML_Char *UNUSED_P(att_type),
const XML_Char *UNUSED_P(dflt),
int UNUSED_P(isrequired))
{
dummy_handler_flags |= DUMMY_ATTLIST_DECL_HANDLER_FLAG;
}
static void XMLCALL
dummy_comment_handler(void *UNUSED_P(userData), const XML_Char *UNUSED_P(data))
{
dummy_handler_flags |= DUMMY_COMMENT_HANDLER_FLAG;
}
static void XMLCALL
dummy_pi_handler(void *UNUSED_P(userData), const XML_Char *UNUSED_P(target), const XML_Char *UNUSED_P(data))
{
dummy_handler_flags |= DUMMY_PI_HANDLER_FLAG;
}
static void XMLCALL
dummy_start_element(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(name), const XML_Char **UNUSED_P(atts))
{
dummy_handler_flags |= DUMMY_START_ELEMENT_HANDLER_FLAG;
}
static void XMLCALL
dummy_end_element(void *UNUSED_P(userData), const XML_Char *UNUSED_P(name))
{}
static void XMLCALL
dummy_start_cdata_handler(void *UNUSED_P(userData))
{
dummy_handler_flags |= DUMMY_START_CDATA_HANDLER_FLAG;
}
static void XMLCALL
dummy_end_cdata_handler(void *UNUSED_P(userData))
{
dummy_handler_flags |= DUMMY_END_CDATA_HANDLER_FLAG;
}
static void XMLCALL
dummy_cdata_handler(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(s),
int UNUSED_P(len))
{}
static void XMLCALL
dummy_start_namespace_decl_handler(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(prefix),
const XML_Char *UNUSED_P(uri))
{
dummy_handler_flags |= DUMMY_START_NS_DECL_HANDLER_FLAG;
}
static void XMLCALL
dummy_end_namespace_decl_handler(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(prefix))
{
dummy_handler_flags |= DUMMY_END_NS_DECL_HANDLER_FLAG;
}
/* This handler is obsolete, but while the code exists we should
* ensure that dealing with the handler is covered by tests.
*/
static void XMLCALL
dummy_unparsed_entity_decl_handler(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(entityName),
const XML_Char *UNUSED_P(base),
const XML_Char *UNUSED_P(systemId),
const XML_Char *UNUSED_P(publicId),
const XML_Char *UNUSED_P(notationName))
{
dummy_handler_flags |= DUMMY_UNPARSED_ENTITY_DECL_HANDLER_FLAG;
}
static void XMLCALL
dummy_default_handler(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(s),
int UNUSED_P(len))
{}
static void XMLCALL
dummy_start_doctype_decl_handler(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(doctypeName),
const XML_Char *UNUSED_P(sysid),
const XML_Char *UNUSED_P(pubid),
int UNUSED_P(has_internal_subset))
{
dummy_handler_flags |= DUMMY_START_DOCTYPE_DECL_HANDLER_FLAG;
}
static void XMLCALL
dummy_end_doctype_decl_handler(void *UNUSED_P(userData))
{
dummy_handler_flags |= DUMMY_END_DOCTYPE_DECL_HANDLER_FLAG;
}
static void XMLCALL
dummy_skip_handler(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(entityName),
int UNUSED_P(is_parameter_entity))
{
dummy_handler_flags |= DUMMY_SKIP_HANDLER_FLAG;
}
/* Useful external entity handler */
typedef struct ExtOption {
const XML_Char *system_id;
const char *parse_text;
} ExtOption;
static int XMLCALL
external_entity_optioner(XML_Parser parser,
const XML_Char *context,
const XML_Char *UNUSED_P(base),
const XML_Char *systemId,
const XML_Char *UNUSED_P(publicId))
{
ExtOption *options = (ExtOption *)XML_GetUserData(parser);
XML_Parser ext_parser;
while (options->parse_text != NULL) {
if (!xcstrcmp(systemId, options->system_id)) {
enum XML_Status rc;
ext_parser =
XML_ExternalEntityParserCreate(parser, context, NULL);
if (ext_parser == NULL)
return XML_STATUS_ERROR;
rc = _XML_Parse_SINGLE_BYTES(ext_parser, options->parse_text,
(int)strlen(options->parse_text),
XML_TRUE);
XML_ParserFree(ext_parser);
return rc;
}
options++;
}
fail("No suitable option found");
return XML_STATUS_ERROR;
}
/*
* Parameter entity evaluation support.
*/
#define ENTITY_MATCH_FAIL (-1)
#define ENTITY_MATCH_NOT_FOUND (0)
#define ENTITY_MATCH_SUCCESS (1)
static const XML_Char *entity_name_to_match = NULL;
static const XML_Char *entity_value_to_match = NULL;
static int entity_match_flag = ENTITY_MATCH_NOT_FOUND;
static void XMLCALL
param_entity_match_handler(void *UNUSED_P(userData),
const XML_Char *entityName,
int is_parameter_entity,
const XML_Char *value,
int value_length,
const XML_Char *UNUSED_P(base),
const XML_Char *UNUSED_P(systemId),
const XML_Char *UNUSED_P(publicId),
const XML_Char *UNUSED_P(notationName))
{
if (!is_parameter_entity ||
entity_name_to_match == NULL ||
entity_value_to_match == NULL) {
return;
}
if (!xcstrcmp(entityName, entity_name_to_match)) {
/* The cast here is safe because we control the horizontal and
* the vertical, and we therefore know our strings are never
* going to overflow an int.
*/
if (value_length != (int)xcstrlen(entity_value_to_match) ||
xcstrncmp(value, entity_value_to_match, value_length)) {
entity_match_flag = ENTITY_MATCH_FAIL;
} else {
entity_match_flag = ENTITY_MATCH_SUCCESS;
}
}
/* Else leave the match flag alone */
}
/*
* Character & encoding tests.
*/
START_TEST(test_nul_byte)
{
char text[] = "<doc>\0</doc>";
/* test that a NUL byte (in US-ASCII data) is an error */
if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text) - 1, XML_TRUE) == XML_STATUS_OK)
fail("Parser did not report error on NUL-byte.");
if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)
xml_failure(parser);
}
END_TEST
START_TEST(test_u0000_char)
{
/* test that a NUL byte (in US-ASCII data) is an error */
expect_failure("<doc>&#0;</doc>",
XML_ERROR_BAD_CHAR_REF,
"Parser did not report error on NUL-byte.");
}
END_TEST
START_TEST(test_siphash_self)
{
if (! sip24_valid())
fail("SipHash self-test failed");
}
END_TEST
START_TEST(test_siphash_spec)
{
/* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */
const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
"\x0a\x0b\x0c\x0d\x0e";
const size_t len = sizeof(message) - 1;
const uint64_t expected = _SIP_ULL(0xa129ca61U, 0x49be45e5U);
struct siphash state;
struct sipkey key;
sip_tokey(&key,
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
"\x0a\x0b\x0c\x0d\x0e\x0f");
sip24_init(&state, &key);
/* Cover spread across calls */
sip24_update(&state, message, 4);
sip24_update(&state, message + 4, len - 4);
/* Cover null length */
sip24_update(&state, message, 0);
if (sip24_final(&state) != expected)
fail("sip24_final failed spec test\n");
/* Cover wrapper */
if (siphash24(message, len, &key) != expected)
fail("siphash24 failed spec test\n");
}
END_TEST
START_TEST(test_bom_utf8)
{
/* This test is really just making sure we don't core on a UTF-8 BOM. */
const char *text = "\357\273\277<e/>";
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
START_TEST(test_bom_utf16_be)
{
char text[] = "\376\377\0<\0e\0/\0>";
if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
START_TEST(test_bom_utf16_le)
{
char text[] = "\377\376<\0e\0/\0>\0";
if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
/* Parse whole buffer at once to exercise a different code path */
START_TEST(test_nobom_utf16_le)
{
char text[] = " \0<\0e\0/\0>\0";
if (XML_Parse(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
static void XMLCALL
accumulate_characters(void *userData, const XML_Char *s, int len)
{
CharData_AppendXMLChars((CharData *)userData, s, len);
}
static void XMLCALL
accumulate_attribute(void *userData, const XML_Char *UNUSED_P(name),
const XML_Char **atts)
{
CharData *storage = (CharData *)userData;
/* Check there are attributes to deal with */
if (atts == NULL)
return;
while (storage->count < 0 && atts[0] != NULL) {
/* "accumulate" the value of the first attribute we see */
CharData_AppendXMLChars(storage, atts[1], -1);
atts += 2;
}
}
static void
_run_character_check(const char *text, const XML_Char *expected,
const char *file, int line)
{
CharData storage;
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
XML_SetCharacterDataHandler(parser, accumulate_characters);
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR)
_xml_failure(parser, file, line);
CharData_CheckXMLChars(&storage, expected);
}
#define run_character_check(text, expected) \
_run_character_check(text, expected, __FILE__, __LINE__)
static void
_run_attribute_check(const char *text, const XML_Char *expected,
const char *file, int line)
{
CharData storage;
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
XML_SetStartElementHandler(parser, accumulate_attribute);
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR)
_xml_failure(parser, file, line);
CharData_CheckXMLChars(&storage, expected);
}
#define run_attribute_check(text, expected) \
_run_attribute_check(text, expected, __FILE__, __LINE__)
typedef struct ExtTest {
const char *parse_text;
const XML_Char *encoding;
CharData *storage;
} ExtTest;
static void XMLCALL
ext_accumulate_characters(void *userData, const XML_Char *s, int len)
{
ExtTest *test_data = (ExtTest *)userData;
accumulate_characters(test_data->storage, s, len);
}
static void
_run_ext_character_check(const char *text,
ExtTest *test_data,
const XML_Char *expected,
const char *file, int line)
{
CharData storage;
CharData_Init(&storage);
test_data->storage = &storage;
XML_SetUserData(parser, test_data);
XML_SetCharacterDataHandler(parser, ext_accumulate_characters);
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
XML_TRUE) == XML_STATUS_ERROR)
_xml_failure(parser, file, line);
CharData_CheckXMLChars(&storage, expected);
}
#define run_ext_character_check(text, test_data, expected) \
_run_ext_character_check(text, test_data, expected, __FILE__, __LINE__)
/* Regression test for SF bug #491986. */
START_TEST(test_danish_latin1)
{
const char *text =
"<?xml version='1.0' encoding='iso-8859-1'?>\n"
"<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>";
#ifdef XML_UNICODE
const XML_Char *expected =
XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5");
#else
const XML_Char *expected =
XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85");
#endif
run_character_check(text, expected);
}
END_TEST
/* Regression test for SF bug #514281. */
START_TEST(test_french_charref_hexidecimal)
{
const char *text =
"<?xml version='1.0' encoding='iso-8859-1'?>\n"
"<doc>&#xE9;&#xE8;&#xE0;&#xE7;&#xEA;&#xC8;</doc>";
#ifdef XML_UNICODE
const XML_Char *expected =
XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
#else
const XML_Char *expected =
XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
#endif
run_character_check(text, expected);
}
END_TEST
START_TEST(test_french_charref_decimal)
{
const char *text =
"<?xml version='1.0' encoding='iso-8859-1'?>\n"
"<doc>&#233;&#232;&#224;&#231;&#234;&#200;</doc>";
#ifdef XML_UNICODE
const XML_Char *expected =
XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
#else
const XML_Char *expected =
XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
#endif
run_character_check(text, expected);
}
END_TEST
START_TEST(test_french_latin1)
{
const char *text =
"<?xml version='1.0' encoding='iso-8859-1'?>\n"
"<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
#ifdef XML_UNICODE
const XML_Char *expected =
XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
#else
const XML_Char *expected =
XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
#endif
run_character_check(text, expected);
}
END_TEST
START_TEST(test_french_utf8)
{
const char *text =
"<?xml version='1.0' encoding='utf-8'?>\n"
"<doc>\xC3\xA9</doc>";
#ifdef XML_UNICODE
const XML_Char *expected = XCS("\x00e9");
#else
const XML_Char *expected = XCS("\xC3\xA9");
#endif
run_character_check(text, expected);
}
END_TEST
/* Regression test for SF bug #600479.
XXX There should be a test that exercises all legal XML Unicode
characters as PCDATA and attribute value content, and XML Name
characters as part of element and attribute names.
*/
START_TEST(test_utf8_false_rejection)
{
const char *text = "<doc>\xEF\xBA\xBF</doc>";
#ifdef XML_UNICODE
const XML_Char *expected = XCS("\xfebf");
#else
const XML_Char *expected = XCS("\xEF\xBA\xBF");
#endif
run_character_check(text, expected);
}
END_TEST
/* Regression test for SF bug #477667.
This test assures that any 8-bit character followed by a 7-bit
character will not be mistakenly interpreted as a valid UTF-8
sequence.
*/
START_TEST(test_illegal_utf8)
{
char text[100];
int i;
for (i = 128; i <= 255; ++i) {
sprintf(text, "<e>%ccd</e>", i);
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_OK) {
sprintf(text,
"expected token error for '%c' (ordinal %d) in UTF-8 text",
i, i);
fail(text);
}
else if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)
xml_failure(parser);
/* Reset the parser since we use the same parser repeatedly. */
XML_ParserReset(parser, NULL);
}
}
END_TEST
/* Examples, not masks: */
#define UTF8_LEAD_1 "\x7f" /* 0b01111111 */
#define UTF8_LEAD_2 "\xdf" /* 0b11011111 */
#define UTF8_LEAD_3 "\xef" /* 0b11101111 */
#define UTF8_LEAD_4 "\xf7" /* 0b11110111 */
#define UTF8_FOLLOW "\xbf" /* 0b10111111 */
START_TEST(test_utf8_auto_align)
{
struct TestCase {
ptrdiff_t expectedMovementInChars;
const char * input;
};
struct TestCase cases[] = {
{00, ""},
{00, UTF8_LEAD_1},
{-1, UTF8_LEAD_2},
{00, UTF8_LEAD_2 UTF8_FOLLOW},
{-1, UTF8_LEAD_3},
{-2, UTF8_LEAD_3 UTF8_FOLLOW},
{00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW},
{-1, UTF8_LEAD_4},
{-2, UTF8_LEAD_4 UTF8_FOLLOW},
{-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW},
{00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW},
};
size_t i = 0;
bool success = true;
for (; i < sizeof(cases) / sizeof(*cases); i++) {
const char * fromLim = cases[i].input + strlen(cases[i].input);
const char * const fromLimInitially = fromLim;
ptrdiff_t actualMovementInChars;
_INTERNAL_trim_to_complete_utf8_characters(cases[i].input, &fromLim);
actualMovementInChars = (fromLim - fromLimInitially);
if (actualMovementInChars != cases[i].expectedMovementInChars) {
size_t j = 0;
success = false;
printf("[-] UTF-8 case %2u: Expected movement by %2d chars"
", actually moved by %2d chars: \"",
(unsigned)(i + 1),
(int)cases[i].expectedMovementInChars,
(int)actualMovementInChars);
for (; j < strlen(cases[i].input); j++) {
printf("\\x%02x", (unsigned char)cases[i].input[j]);
}
printf("\"\n");
}
}
if (! success) {
fail("UTF-8 auto-alignment is not bullet-proof\n");
}
}
END_TEST
START_TEST(test_utf16)
{
/* <?xml version="1.0" encoding="UTF-16"?>
* <doc a='123'>some {A} text</doc>
*
* where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A
*/
char text[] =
"\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o"
"\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o"
"\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066"
"\000'\000?\000>\000\n"
"\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>"
"\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000"
"<\000/\000d\000o\000c\000>";
#ifdef XML_UNICODE
const XML_Char *expected = XCS("some \xff21 text");
#else
const XML_Char *expected = XCS("some \357\274\241 text");
#endif
CharData storage;
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
XML_SetCharacterDataHandler(parser, accumulate_characters);
if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
CharData_CheckXMLChars(&storage, expected);
}
END_TEST
START_TEST(test_utf16_le_epilog_newline)
{
unsigned int first_chunk_bytes = 17;
char text[] =
"\xFF\xFE" /* BOM */
"<\000e\000/\000>\000" /* document element */
"\r\000\n\000\r\000\n\000"; /* epilog */
if (first_chunk_bytes >= sizeof(text) - 1)
fail("bad value of first_chunk_bytes");
if ( _XML_Parse_SINGLE_BYTES(parser, text, first_chunk_bytes, XML_FALSE)
== XML_STATUS_ERROR)
xml_failure(parser);
else {
enum XML_Status rc;
rc = _XML_Parse_SINGLE_BYTES(parser, text + first_chunk_bytes,
sizeof(text) - first_chunk_bytes - 1, XML_TRUE);
if (rc == XML_STATUS_ERROR)
xml_failure(parser);
}
}
END_TEST
/* Test that an outright lie in the encoding is faulted */
START_TEST(test_not_utf16)
{
const char *text =
"<?xml version='1.0' encoding='utf-16'?>"
"<doc>Hi</doc>";
/* Use a handler to provoke the appropriate code paths */
XML_SetXmlDeclHandler(parser, dummy_xdecl_handler);
expect_failure(text,
XML_ERROR_INCORRECT_ENCODING,
"UTF-16 declared in UTF-8 not faulted");
}
END_TEST
/* Test that an unknown encoding is rejected */
START_TEST(test_bad_encoding)
{
const char *text = "<doc>Hi</doc>";
if (!XML_SetEncoding(parser, XCS("unknown-encoding")))
fail("XML_SetEncoding failed");
expect_failure(text,
XML_ERROR_UNKNOWN_ENCODING,
"Unknown encoding not faulted");
}
END_TEST
/* Regression test for SF bug #481609, #774028. */
START_TEST(test_latin1_umlauts)
{
const char *text =
"<?xml version='1.0' encoding='iso-8859-1'?>\n"
"<e a='\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; >'\n"
" >\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; ></e>";
#ifdef XML_UNICODE
/* Expected results in UTF-16 */
const XML_Char *expected =
XCS("\x00e4 \x00f6 \x00fc ")
XCS("\x00e4 \x00f6 \x00fc ")
XCS("\x00e4 \x00f6 \x00fc >");
#else
/* Expected results in UTF-8 */
const XML_Char *expected =
XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ")
XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ")
XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >");
#endif
run_character_check(text, expected);
XML_ParserReset(parser, NULL);
run_attribute_check(text, expected);
/* Repeat with a default handler */
XML_ParserReset(parser, NULL);
XML_SetDefaultHandler(parser, dummy_default_handler);
run_character_check(text, expected);
XML_ParserReset(parser, NULL);
XML_SetDefaultHandler(parser, dummy_default_handler);
run_attribute_check(text, expected);
}
END_TEST
/* Test that an element name with a 4-byte UTF-8 character is rejected */
START_TEST(test_long_utf8_character)
{
const char *text =
"<?xml version='1.0' encoding='utf-8'?>\n"
/* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */
"<do\xf0\x90\x80\x80/>";
expect_failure(text,
XML_ERROR_INVALID_TOKEN,
"4-byte UTF-8 character in element name not faulted");
}
END_TEST
/* Test that a long latin-1 attribute (too long to convert in one go)
* is correctly converted
*/
START_TEST(test_long_latin1_attribute)
{
const char *text =
"<?xml version='1.0' encoding='iso-8859-1'?>\n"
"<doc att='"
/* 64 characters per line */
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO"
/* Last character splits across a buffer boundary */
"\xe4'>\n</doc>";
#ifdef XML_UNICODE
const XML_Char *expected =
/* 64 characters per line */
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO")
XCS("\x00e4");
#else
const XML_Char *expected =
/* 64 characters per line */
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO")
XCS("\xc3\xa4");
#endif
run_attribute_check(text, expected);
}
END_TEST
/* Test that a long ASCII attribute (too long to convert in one go)
* is correctly converted
*/
START_TEST(test_long_ascii_attribute)
{
const char *text =
"<?xml version='1.0' encoding='us-ascii'?>\n"
"<doc att='"
/* 64 characters per line */
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"01234'>\n</doc>";
const XML_Char *expected =
/* 64 characters per line */
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("01234");
run_attribute_check(text, expected);
}
END_TEST
/* Regression test #1 for SF bug #653180. */
START_TEST(test_line_number_after_parse)
{
const char *text =
"<tag>\n"
"\n"
"\n</tag>";
XML_Size lineno;
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_FALSE) == XML_STATUS_ERROR)
xml_failure(parser);
lineno = XML_GetCurrentLineNumber(parser);
if (lineno != 4) {
char buffer[100];
sprintf(buffer,
"expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno);
fail(buffer);
}
}
END_TEST
/* Regression test #2 for SF bug #653180. */
START_TEST(test_column_number_after_parse)
{
const char *text = "<tag></tag>";
XML_Size colno;
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_FALSE) == XML_STATUS_ERROR)
xml_failure(parser);
colno = XML_GetCurrentColumnNumber(parser);
if (colno != 11) {
char buffer[100];
sprintf(buffer,
"expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno);
fail(buffer);
}
}
END_TEST
#define STRUCT_START_TAG 0
#define STRUCT_END_TAG 1
static void XMLCALL
start_element_event_handler2(void *userData, const XML_Char *name,
const XML_Char **UNUSED_P(attr))
{
StructData *storage = (StructData *) userData;
StructData_AddItem(storage, name,
XML_GetCurrentColumnNumber(parser),
XML_GetCurrentLineNumber(parser),
STRUCT_START_TAG);
}
static void XMLCALL
end_element_event_handler2(void *userData, const XML_Char *name)
{
StructData *storage = (StructData *) userData;
StructData_AddItem(storage, name,
XML_GetCurrentColumnNumber(parser),
XML_GetCurrentLineNumber(parser),
STRUCT_END_TAG);
}
/* Regression test #3 for SF bug #653180. */
START_TEST(test_line_and_column_numbers_inside_handlers)
{
const char *text =
"<a>\n" /* Unix end-of-line */
" <b>\r\n" /* Windows end-of-line */
" <c/>\r" /* Mac OS end-of-line */
" </b>\n"
" <d>\n"
" <f/>\n"
" </d>\n"
"</a>";
const StructDataEntry expected[] = {
{ XCS("a"), 0, 1, STRUCT_START_TAG },
{ XCS("b"), 2, 2, STRUCT_START_TAG },
{ XCS("c"), 4, 3, STRUCT_START_TAG },
{ XCS("c"), 8, 3, STRUCT_END_TAG },
{ XCS("b"), 2, 4, STRUCT_END_TAG },
{ XCS("d"), 2, 5, STRUCT_START_TAG },
{ XCS("f"), 4, 6, STRUCT_START_TAG },
{ XCS("f"), 8, 6, STRUCT_END_TAG },
{ XCS("d"), 2, 7, STRUCT_END_TAG },
{ XCS("a"), 0, 8, STRUCT_END_TAG }
};
const int expected_count = sizeof(expected) / sizeof(StructDataEntry);
StructData storage;
StructData_Init(&storage);
XML_SetUserData(parser, &storage);
XML_SetStartElementHandler(parser, start_element_event_handler2);
XML_SetEndElementHandler(parser, end_element_event_handler2);
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
StructData_CheckItems(&storage, expected, expected_count);
StructData_Dispose(&storage);
}
END_TEST
/* Regression test #4 for SF bug #653180. */
START_TEST(test_line_number_after_error)
{
const char *text =
"<a>\n"
" <b>\n"
" </a>"; /* missing </b> */
XML_Size lineno;
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_FALSE) != XML_STATUS_ERROR)
fail("Expected a parse error");
lineno = XML_GetCurrentLineNumber(parser);
if (lineno != 3) {
char buffer[100];
sprintf(buffer, "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno);
fail(buffer);
}
}
END_TEST
/* Regression test #5 for SF bug #653180. */
START_TEST(test_column_number_after_error)
{
const char *text =
"<a>\n"
" <b>\n"
" </a>"; /* missing </b> */
XML_Size colno;
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_FALSE) != XML_STATUS_ERROR)
fail("Expected a parse error");
colno = XML_GetCurrentColumnNumber(parser);
if (colno != 4) {
char buffer[100];
sprintf(buffer,
"expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno);
fail(buffer);
}
}
END_TEST
/* Regression test for SF bug #478332. */
START_TEST(test_really_long_lines)
{
/* This parses an input line longer than INIT_DATA_BUF_SIZE
characters long (defined to be 1024 in xmlparse.c). We take a
really cheesy approach to building the input buffer, because
this avoids writing bugs in buffer-filling code.
*/
const char *text =
"<e>"
/* 64 chars */
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
/* until we have at least 1024 characters on the line: */
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"</e>";
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
/* Test cdata processing across a buffer boundary */
START_TEST(test_really_long_encoded_lines)
{
/* As above, except that we want to provoke an output buffer
* overflow with a non-trivial encoding. For this we need to pass
* the whole cdata in one go, not byte-by-byte.
*/
void *buffer;
const char *text =
"<?xml version='1.0' encoding='iso-8859-1'?>"
"<e>"
/* 64 chars */
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
/* until we have at least 1024 characters on the line: */
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"</e>";
int parse_len = (int)strlen(text);
/* Need a cdata handler to provoke the code path we want to test */
XML_SetCharacterDataHandler(parser, dummy_cdata_handler);
buffer = XML_GetBuffer(parser, parse_len);
if (buffer == NULL)
fail("Could not allocate parse buffer");
memcpy(buffer, text, parse_len);
if (XML_ParseBuffer(parser, parse_len, XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
/*
* Element event tests.
*/
static void XMLCALL
start_element_event_handler(void *userData,
const XML_Char *name,
const XML_Char **UNUSED_P(atts))
{
CharData_AppendXMLChars((CharData *)userData, name, -1);
}
static void XMLCALL
end_element_event_handler(void *userData, const XML_Char *name)
{
CharData *storage = (CharData *) userData;
CharData_AppendXMLChars(storage, XCS("/"), 1);
CharData_AppendXMLChars(storage, name, -1);
}
START_TEST(test_end_element_events)
{
const char *text = "<a><b><c/></b><d><f/></d></a>";
const XML_Char *expected = XCS("/c/b/f/d/a");
CharData storage;
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
XML_SetEndElementHandler(parser, end_element_event_handler);
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
CharData_CheckXMLChars(&storage, expected);
}
END_TEST
/*
* Attribute tests.
*/
/* Helpers used by the following test; this checks any "attr" and "refs"
attributes to make sure whitespace has been normalized.
Return true if whitespace has been normalized in a string, using
the rules for attribute value normalization. The 'is_cdata' flag
is needed since CDATA attributes don't need to have multiple
whitespace characters collapsed to a single space, while other
attribute data types do. (Section 3.3.3 of the recommendation.)
*/
static int
is_whitespace_normalized(const XML_Char *s, int is_cdata)
{
int blanks = 0;
int at_start = 1;
while (*s) {
if (*s == XCS(' '))
++blanks;
else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r'))
return 0;
else {
if (at_start) {
at_start = 0;
if (blanks && !is_cdata)
/* illegal leading blanks */
return 0;
}
else if (blanks > 1 && !is_cdata)
return 0;
blanks = 0;
}
++s;
}
if (blanks && !is_cdata)
return 0;
return 1;
}
/* Check the attribute whitespace checker: */
static void
testhelper_is_whitespace_normalized(void)
{
assert(is_whitespace_normalized(XCS("abc"), 0));
assert(is_whitespace_normalized(XCS("abc"), 1));
assert(is_whitespace_normalized(XCS("abc def ghi"), 0));
assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
assert(!is_whitespace_normalized(XCS(" abc def ghi"), 0));
assert(is_whitespace_normalized(XCS(" abc def ghi"), 1));
assert(!is_whitespace_normalized(XCS("abc def ghi"), 0));
assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
assert(!is_whitespace_normalized(XCS("abc def ghi "), 0));
assert(is_whitespace_normalized(XCS("abc def ghi "), 1));
assert(!is_whitespace_normalized(XCS(" "), 0));
assert(is_whitespace_normalized(XCS(" "), 1));
assert(!is_whitespace_normalized(XCS("\t"), 0));
assert(!is_whitespace_normalized(XCS("\t"), 1));
assert(!is_whitespace_normalized(XCS("\n"), 0));
assert(!is_whitespace_normalized(XCS("\n"), 1));
assert(!is_whitespace_normalized(XCS("\r"), 0));
assert(!is_whitespace_normalized(XCS("\r"), 1));
assert(!is_whitespace_normalized(XCS("abc\t def"), 1));
}
static void XMLCALL
check_attr_contains_normalized_whitespace(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(name),
const XML_Char **atts)
{
int i;
for (i = 0; atts[i] != NULL; i += 2) {
const XML_Char *attrname = atts[i];
const XML_Char *value = atts[i + 1];
if (xcstrcmp(XCS("attr"), attrname) == 0
|| xcstrcmp(XCS("ents"), attrname) == 0
|| xcstrcmp(XCS("refs"), attrname) == 0) {
if (!is_whitespace_normalized(value, 0)) {
char buffer[256];
sprintf(buffer, "attribute value not normalized: %"
XML_FMT_STR "='%" XML_FMT_STR "'",
attrname, value);
fail(buffer);
}
}
}
}
START_TEST(test_attr_whitespace_normalization)
{
const char *text =
"<!DOCTYPE doc [\n"
" <!ATTLIST doc\n"
" attr NMTOKENS #REQUIRED\n"
" ents ENTITIES #REQUIRED\n"
" refs IDREFS #REQUIRED>\n"
"]>\n"
"<doc attr=' a b c\t\td\te\t' refs=' id-1 \t id-2\t\t' \n"
" ents=' ent-1 \t\r\n"
" ent-2 ' >\n"
" <e id='id-1'/>\n"
" <e id='id-2'/>\n"
"</doc>";
XML_SetStartElementHandler(parser,
check_attr_contains_normalized_whitespace);
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
/*
* XML declaration tests.
*/
START_TEST(test_xmldecl_misplaced)
{
expect_failure("\n"
"<?xml version='1.0'?>\n"
"<a/>",
XML_ERROR_MISPLACED_XML_PI,
"failed to report misplaced XML declaration");
}
END_TEST
START_TEST(test_xmldecl_invalid)
{
expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>",
XML_ERROR_XML_DECL,
"Failed to report invalid XML declaration");
}
END_TEST
START_TEST(test_xmldecl_missing_attr)
{
expect_failure("<?xml ='1.0'?>\n<doc/>\n",
XML_ERROR_XML_DECL,
"Failed to report missing XML declaration attribute");
}
END_TEST
START_TEST(test_xmldecl_missing_value)
{
expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n"
"<doc/>",
XML_ERROR_XML_DECL,
"Failed to report missing attribute value");
}
END_TEST
/* Regression test for SF bug #584832. */
static int XMLCALL
UnknownEncodingHandler(void *UNUSED_P(data),const XML_Char *encoding,XML_Encoding *info)
{
if (xcstrcmp(encoding, XCS("unsupported-encoding")) == 0) {
int i;
for (i = 0; i < 256; ++i)
info->map[i] = i;
info->data = NULL;
info->convert = NULL;
info->release = NULL;
return XML_STATUS_OK;
}
return XML_STATUS_ERROR;
}
START_TEST(test_unknown_encoding_internal_entity)
{
const char *text =
"<?xml version='1.0' encoding='unsupported-encoding'?>\n"
"<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
"<test a='&foo;'/>";
XML_SetUnknownEncodingHandler(parser, UnknownEncodingHandler, NULL);
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
/* Test unrecognised encoding handler */
static void dummy_release(void *UNUSED_P(data))
{
}
static int XMLCALL
UnrecognisedEncodingHandler(void *UNUSED_P(data),
const XML_Char *UNUSED_P(encoding),
XML_Encoding *info)
{
info->data = NULL;
info->convert = NULL;
info->release = dummy_release;
return XML_STATUS_ERROR;
}
START_TEST(test_unrecognised_encoding_internal_entity)
{
const char *text =
"<?xml version='1.0' encoding='unsupported-encoding'?>\n"
"<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
"<test a='&foo;'/>";
XML_SetUnknownEncodingHandler(parser,
UnrecognisedEncodingHandler,
NULL);
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR)
fail("Unrecognised encoding not rejected");
}
END_TEST
/* Regression test for SF bug #620106. */
static int XMLCALL
external_entity_loader(XML_Parser parser,
const XML_Char *context,
const XML_Char *UNUSED_P(base),
const XML_Char *UNUSED_P(systemId),
const XML_Char *UNUSED_P(publicId))
{
ExtTest *test_data = (ExtTest *)XML_GetUserData(parser);
XML_Parser extparser;
extparser = XML_ExternalEntityParserCreate(parser, context, NULL);
if (extparser == NULL)
fail("Could not create external entity parser.");
if (test_data->encoding != NULL) {
if (!XML_SetEncoding(extparser, test_data->encoding))
fail("XML_SetEncoding() ignored for external entity");
}
if ( _XML_Parse_SINGLE_BYTES(extparser,
test_data->parse_text,
(int)strlen(test_data->parse_text),
XML_TRUE)
== XML_STATUS_ERROR) {
xml_failure(extparser);
return XML_STATUS_ERROR;
}
XML_ParserFree(extparser);
return XML_STATUS_OK;
}
START_TEST(test_ext_entity_set_encoding)
{
const char *text =
"<!DOCTYPE doc [\n"
" <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
"]>\n"
"<doc>&en;</doc>";
ExtTest test_data = {
/* This text says it's an unsupported encoding, but it's really
UTF-8, which we tell Expat using XML_SetEncoding().
*/
"<?xml encoding='iso-8859-3'?>\xC3\xA9",
XCS("utf-8"),
NULL
};
#ifdef XML_UNICODE
const XML_Char *expected = XCS("\x00e9");
#else
const XML_Char *expected = XCS("\xc3\xa9");
#endif
XML_SetExternalEntityRefHandler(parser, external_entity_loader);
run_ext_character_check(text, &test_data, expected);
}
END_TEST
/* Test external entities with no handler */
START_TEST(test_ext_entity_no_handler)
{
const char *text =
"<!DOCTYPE doc [\n"
" <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
"]>\n"
"<doc>&en;</doc>";
XML_SetDefaultHandler(parser, dummy_default_handler);
run_character_check(text, XCS(""));
}
END_TEST
/* Test UTF-8 BOM is accepted */
START_TEST(test_ext_entity_set_bom)
{
const char *text =
"<!DOCTYPE doc [\n"
" <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
"]>\n"
"<doc>&en;</doc>";
ExtTest test_data = {
"\xEF\xBB\xBF" /* BOM */
"<?xml encoding='iso-8859-3'?>"
"\xC3\xA9",
XCS("utf-8"),
NULL
};
#ifdef XML_UNICODE
const XML_Char *expected = XCS("\x00e9");
#else
const XML_Char *expected = XCS("\xc3\xa9");
#endif
XML_SetExternalEntityRefHandler(parser, external_entity_loader);
run_ext_character_check(text, &test_data, expected);
}
END_TEST
/* Test that bad encodings are faulted */
typedef struct ext_faults
{
const char *parse_text;
const char *fail_text;
const XML_Char *encoding;
enum XML_Error error;
} ExtFaults;
static int XMLCALL
external_entity_faulter(XML_Parser parser,
const XML_Char *context,
const XML_Char *UNUSED_P(base),
const XML_Char *UNUSED_P(systemId),
const XML_Char *UNUSED_P(publicId))
{
XML_Parser ext_parser;
ExtFaults *fault = (ExtFaults *)XML_GetUserData(parser);
ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL);
if (ext_parser == NULL)
fail("Could not create external entity parser");
if (fault->encoding != NULL) {
if (!XML_SetEncoding(ext_parser, fault->encoding))
fail("XML_SetEncoding failed");
}
if (_XML_Parse_SINGLE_BYTES(ext_parser,
fault->parse_text,
(int)strlen(fault->parse_text),
XML_TRUE) != XML_STATUS_ERROR)
fail(fault->fail_text);
if (XML_GetErrorCode(ext_parser) != fault->error)
xml_failure(ext_parser);
XML_ParserFree(ext_parser);
return XML_STATUS_ERROR;
}
START_TEST(test_ext_entity_bad_encoding)
{
const char *text =
"<!DOCTYPE doc [\n"
" <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
"]>\n"
"<doc>&en;</doc>";
ExtFaults fault = {
"<?xml encoding='iso-8859-3'?>u",
"Unsupported encoding not faulted",
XCS("unknown"),
XML_ERROR_UNKNOWN_ENCODING
};
XML_SetExternalEntityRefHandler(parser, external_entity_faulter);
XML_SetUserData(parser, &fault);
expect_failure(text,
XML_ERROR_EXTERNAL_ENTITY_HANDLING,
"Bad encoding should not have been accepted");
}
END_TEST
/* Try handing an invalid encoding to an external entity parser */
START_TEST(test_ext_entity_bad_encoding_2)
{
const char *text =
"<?xml version='1.0' encoding='us-ascii'?>\n"
"<!DOCTYPE doc SYSTEM 'foo'>\n"
"<doc>&entity;</doc>";
ExtFaults fault = {
"<!ELEMENT doc (#PCDATA)*>",
"Unknown encoding not faulted",
XCS("unknown-encoding"),
XML_ERROR_UNKNOWN_ENCODING
};
XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
XML_SetExternalEntityRefHandler(parser, external_entity_faulter);
XML_SetUserData(parser, &fault);
expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
"Bad encoding not faulted in external entity handler");
}
END_TEST
/* Test that no error is reported for unknown entities if we don't
read an external subset. This was fixed in Expat 1.95.5.
*/
START_TEST(test_wfc_undeclared_entity_unread_external_subset) {
const char *text =
"<!DOCTYPE doc SYSTEM 'foo'>\n"
"<doc>&entity;</doc>";
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
/* Test that an error is reported for unknown entities if we don't
have an external subset.
*/
START_TEST(test_wfc_undeclared_entity_no_external_subset) {
expect_failure("<doc>&entity;</doc>",
XML_ERROR_UNDEFINED_ENTITY,
"Parser did not report undefined entity w/out a DTD.");
}
END_TEST
/* Test that an error is reported for unknown entities if we don't
read an external subset, but have been declared standalone.
*/
START_TEST(test_wfc_undeclared_entity_standalone) {
const char *text =
"<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
"<!DOCTYPE doc SYSTEM 'foo'>\n"
"<doc>&entity;</doc>";
expect_failure(text,
XML_ERROR_UNDEFINED_ENTITY,
"Parser did not report undefined entity (standalone).");
}
END_TEST
/* Test that an error is reported for unknown entities if we have read
an external subset, and standalone is true.
*/
START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) {
const char *text =
"<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
"<!DOCTYPE doc SYSTEM 'foo'>\n"
"<doc>&entity;</doc>";
ExtTest test_data = {
"<!ELEMENT doc (#PCDATA)*>",
NULL,
NULL
};
XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
XML_SetUserData(parser, &test_data);
XML_SetExternalEntityRefHandler(parser, external_entity_loader);
expect_failure(text,
XML_ERROR_UNDEFINED_ENTITY,
"Parser did not report undefined entity (external DTD).");
}
END_TEST
/* Test that external entity handling is not done if the parsing flag
* is set to UNLESS_STANDALONE
*/
START_TEST(test_entity_with_external_subset_unless_standalone) {
const char *text =
"<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
"<!DOCTYPE doc SYSTEM 'foo'>\n"
"<doc>&entity;</doc>";
ExtTest test_data = { "<!ENTITY entity 'bar'>", NULL, NULL };
XML_SetParamEntityParsing(parser,
XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
XML_SetUserData(parser, &test_data);
XML_SetExternalEntityRefHandler(parser, external_entity_loader);
expect_failure(text,
XML_ERROR_UNDEFINED_ENTITY,
"Parser did not report undefined entity");
}
END_TEST
/* Test that no error is reported for unknown entities if we have read
an external subset, and standalone is false.
*/
START_TEST(test_wfc_undeclared_entity_with_external_subset) {
const char *text =
"<?xml version='1.0' encoding='us-ascii'?>\n"
"<!DOCTYPE doc SYSTEM 'foo'>\n"
"<doc>&entity;</doc>";
ExtTest test_data = {
"<!ELEMENT doc (#PCDATA)*>",
NULL,
NULL
};
XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
XML_SetExternalEntityRefHandler(parser, external_entity_loader);
run_ext_character_check(text, &test_data, XCS(""));
}
END_TEST
/* Test that an error is reported if our NotStandalone handler fails */
static int XMLCALL
reject_not_standalone_handler(void *UNUSED_P(userData))
{
return XML_STATUS_ERROR;
}
START_TEST(test_not_standalone_handler_reject)
{
const char *text =
"<?xml version='1.0' encoding='us-ascii'?>\n"
"<!DOCTYPE doc SYSTEM 'foo'>\n"
"<doc>&entity;</doc>";
ExtTest test_data = {
"<!ELEMENT doc (#PCDATA)*>",
NULL,
NULL
};
XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
XML_SetUserData(parser, &test_data);
XML_SetExternalEntityRefHandler(parser, external_entity_loader);
XML_SetNotStandaloneHandler(parser, reject_not_standalone_handler);
expect_failure(text, XML_ERROR_NOT_STANDALONE,
"NotStandalone handler failed to reject");
/* Try again but without external entity handling */
XML_ParserReset(parser, NULL);
XML_SetNotStandaloneHandler(parser, reject_not_standalone_handler);
expect_failure(text, XML_ERROR_NOT_STANDALONE,
"NotStandalone handler failed to reject");
}
END_TEST
/* Test that no error is reported if our NotStandalone handler succeeds */
static int XMLCALL
accept_not_standalone_handler(void *UNUSED_P(userData))
{
return XML_STATUS_OK;
}
START_TEST(test_not_standalone_handler_accept)
{
const char *text =
"<?xml version='1.0' encoding='us-ascii'?>\n"
"<!DOCTYPE doc SYSTEM 'foo'>\n"
"<doc>&entity;</doc>";
ExtTest test_data = {
"<!ELEMENT doc (#PCDATA)*>",
NULL,
NULL
};
XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
XML_SetExternalEntityRefHandler(parser, external_entity_loader);
XML_SetNotStandaloneHandler(parser, accept_not_standalone_handler);
run_ext_character_check(text, &test_data, XCS(""));
/* Repeat wtihout the external entity handler */
XML_ParserReset(parser, NULL);
XML_SetNotStandaloneHandler(parser, accept_not_standalone_handler);
run_character_check(text, XCS(""));
}
END_TEST
START_TEST(test_wfc_no_recursive_entity_refs)
{
const char *text =
"<!DOCTYPE doc [\n"
" <!ENTITY entity '&#38;entity;'>\n"
"]>\n"
"<doc>&entity;</doc>";
expect_failure(text,
XML_ERROR_RECURSIVE_ENTITY_REF,
"Parser did not report recursive entity reference.");
}
END_TEST
/* Test incomplete external entities are faulted */
START_TEST(test_ext_entity_invalid_parse)
{
const char *text =
"<!DOCTYPE doc [\n"
" <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
"]>\n"
"<doc>&en;</doc>";
const ExtFaults faults[] = {
{
"<",
"Incomplete element declaration not faulted",
NULL,
XML_ERROR_UNCLOSED_TOKEN
},
{
"<\xe2\x82", /* First two bytes of a three-byte char */
"Incomplete character not faulted",
NULL,
XML_ERROR_PARTIAL_CHAR
},
{
"<tag>\xe2\x82",
"Incomplete character in CDATA not faulted",
NULL,
XML_ERROR_PARTIAL_CHAR
},
{ NULL, NULL, NULL, XML_ERROR_NONE }
};
const ExtFaults *fault = faults;
for (; fault->parse_text != NULL; fault++) {
XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
XML_SetExternalEntityRefHandler(parser, external_entity_faulter);
XML_SetUserData(parser, (void *)fault);
expect_failure(text,
XML_ERROR_EXTERNAL_ENTITY_HANDLING,
"Parser did not report external entity error");
XML_ParserReset(parser, NULL);
}
}
END_TEST
/* Regression test for SF bug #483514. */
START_TEST(test_dtd_default_handling)
{
const char *text =
"<!DOCTYPE doc [\n"
"<!ENTITY e SYSTEM 'http://example.org/e'>\n"
"<!NOTATION n SYSTEM 'http://example.org/n'>\n"
"<!ELEMENT doc EMPTY>\n"
"<!ATTLIST doc a CDATA #IMPLIED>\n"
"<?pi in dtd?>\n"
"<!--comment in dtd-->\n"
"]><doc/>";
XML_SetDefaultHandler(parser, accumulate_characters);
XML_SetStartDoctypeDeclHandler(parser, dummy_start_doctype_handler);
XML_SetEndDoctypeDeclHandler(parser, dummy_end_doctype_handler);
XML_SetEntityDeclHandler(parser, dummy_entity_decl_handler);
XML_SetNotationDeclHandler(parser, dummy_notation_decl_handler);
XML_SetElementDeclHandler(parser, dummy_element_decl_handler);
XML_SetAttlistDeclHandler(parser, dummy_attlist_decl_handler);
XML_SetProcessingInstructionHandler(parser, dummy_pi_handler);
XML_SetCommentHandler(parser, dummy_comment_handler);
XML_SetStartCdataSectionHandler(parser, dummy_start_cdata_handler);
XML_SetEndCdataSectionHandler(parser, dummy_end_cdata_handler);
run_character_check(text, XCS("\n\n\n\n\n\n\n<doc/>"));
}
END_TEST
/* Test handling of attribute declarations */
typedef struct AttTest {
const char *definition;
const XML_Char *element_name;
const XML_Char *attr_name;
const XML_Char *attr_type;
const XML_Char *default_value;
int is_required;
} AttTest;
static void XMLCALL
verify_attlist_decl_handler(void *userData,
const XML_Char *element_name,
const XML_Char *attr_name,
const XML_Char *attr_type,
const XML_Char *default_value,
int is_required)
{
AttTest *at = (AttTest *)userData;
if (xcstrcmp(element_name, at->element_name))
fail("Unexpected element name in attribute declaration");
if (xcstrcmp(attr_name, at->attr_name))
fail("Unexpected attribute name in attribute declaration");
if (xcstrcmp(attr_type, at->attr_type))
fail("Unexpected attribute type in attribute declaration");
if ((default_value == NULL && at->default_value != NULL) ||
(default_value != NULL && at->default_value == NULL) ||
(default_value != NULL && xcstrcmp(default_value, at->default_value)))
fail("Unexpected default value in attribute declaration");
if (is_required != at->is_required)
fail("Requirement mismatch in attribute declaration");
}
START_TEST(test_dtd_attr_handling)
{
const char *prolog =
"<!DOCTYPE doc [\n"
"<!ELEMENT doc EMPTY>\n";
AttTest attr_data[] = {
{
"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n"
"]>"
"<doc a='two'/>",
XCS("doc"),
XCS("a"),
XCS("(one|two|three)"), /* Extraneous spaces will be removed */
NULL,
XML_TRUE
},
{
"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n"
"<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n"
"]>"
"<doc/>",
XCS("doc"),
XCS("a"),
XCS("NOTATION(foo)"),
NULL,
XML_FALSE
},
{
"<!ATTLIST doc a NOTATION (foo) 'bar'>\n"
"]>"
"<doc/>",
XCS("doc"),
XCS("a"),
XCS("NOTATION(foo)"),
XCS("bar"),
XML_FALSE
},
{
"<!ATTLIST doc a CDATA '\xdb\xb2'>\n"
"]>"
"<doc/>",
XCS("doc"),
XCS("a"),
XCS("CDATA"),
#ifdef XML_UNICODE
XCS("\x06f2"),
#else
XCS("\xdb\xb2"),
#endif
XML_FALSE
},
{ NULL, NULL, NULL, NULL, NULL, XML_FALSE }
};
AttTest *test;
for (test = attr_data; test->definition != NULL; test++) {
XML_SetAttlistDeclHandler(parser, verify_attlist_decl_handler);
XML_SetUserData(parser, test);
if (_XML_Parse_SINGLE_BYTES(parser, prolog, (int)strlen(prolog),
XML_FALSE) == XML_STATUS_ERROR)
xml_failure(parser);
if (_XML_Parse_SINGLE_BYTES(parser,
test->definition,
(int)strlen(test->definition),
XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
XML_ParserReset(parser, NULL);
}
}
END_TEST
/* See related SF bug #673791.
When namespace processing is enabled, setting the namespace URI for
a prefix is not allowed; this test ensures that it *is* allowed
when namespace processing is not enabled.
(See Namespaces in XML, section 2.)
*/
START_TEST(test_empty_ns_without_namespaces)
{
const char *text =
"<doc xmlns:prefix='http://example.org/'>\n"
" <e xmlns:prefix=''/>\n"
"</doc>";
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
/* Regression test for SF bug #824420.
Checks that an xmlns:prefix attribute set in an attribute's default
value isn't misinterpreted.
*/
START_TEST(test_ns_in_attribute_default_without_namespaces)
{
const char *text =
"<!DOCTYPE e:element [\n"
" <!ATTLIST e:element\n"
" xmlns:e CDATA 'http://example.org/'>\n"
" ]>\n"
"<e:element/>";
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
static const char *long_character_data_text =
"<?xml version='1.0' encoding='iso-8859-1'?><s>"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"</s>";
static XML_Bool resumable = XML_FALSE;
static void
clearing_aborting_character_handler(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(s), int UNUSED_P(len))
{
XML_StopParser(parser, resumable);
XML_SetCharacterDataHandler(parser, NULL);
}
/* Regression test for SF bug #1515266: missing check of stopped
parser in doContext() 'for' loop. */
START_TEST(test_stop_parser_between_char_data_calls)
{
/* The sample data must be big enough that there are two calls to
the character data handler from within the inner "for" loop of
the XML_TOK_DATA_CHARS case in doContent(), and the character
handler must stop the parser and clear the character data
handler.
*/
const char *text = long_character_data_text;
XML_SetCharacterDataHandler(parser, clearing_aborting_character_handler);
resumable = XML_FALSE;
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR)
xml_failure(parser);
if (XML_GetErrorCode(parser) != XML_ERROR_ABORTED)
xml_failure(parser);
}
END_TEST
/* Regression test for SF bug #1515266: missing check of stopped
parser in doContext() 'for' loop. */
START_TEST(test_suspend_parser_between_char_data_calls)
{
/* The sample data must be big enough that there are two calls to
the character data handler from within the inner "for" loop of
the XML_TOK_DATA_CHARS case in doContent(), and the character
handler must stop the parser and clear the character data
handler.
*/
const char *text = long_character_data_text;
XML_SetCharacterDataHandler(parser, clearing_aborting_character_handler);
resumable = XML_TRUE;
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_SUSPENDED)
xml_failure(parser);
if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
xml_failure(parser);
/* Try parsing directly */
if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_ERROR)
fail("Attempt to continue parse while suspended not faulted");
if (XML_GetErrorCode(parser) != XML_ERROR_SUSPENDED)
fail("Suspended parse not faulted with correct error");
}
END_TEST
static XML_Bool abortable = XML_FALSE;
static void
parser_stop_character_handler(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(s),
int UNUSED_P(len))
{
XML_StopParser(parser, resumable);
XML_SetCharacterDataHandler(parser, NULL);
if (!resumable) {
/* Check that aborting an aborted parser is faulted */
if (XML_StopParser(parser, XML_FALSE) != XML_STATUS_ERROR)
fail("Aborting aborted parser not faulted");
if (XML_GetErrorCode(parser) != XML_ERROR_FINISHED)
xml_failure(parser);
} else if (abortable) {
/* Check that aborting a suspended parser works */
if (XML_StopParser(parser, XML_FALSE) == XML_STATUS_ERROR)
xml_failure(parser);
} else {
/* Check that suspending a suspended parser works */
if (XML_StopParser(parser, XML_TRUE) != XML_STATUS_ERROR)
fail("Suspending suspended parser not faulted");
if (XML_GetErrorCode(parser) != XML_ERROR_SUSPENDED)
xml_failure(parser);
}
}
/* Test repeated calls to XML_StopParser are handled correctly */
START_TEST(test_repeated_stop_parser_between_char_data_calls)
{
const char *text = long_character_data_text;
XML_SetCharacterDataHandler(parser, parser_stop_character_handler);
resumable = XML_FALSE;
abortable = XML_FALSE;
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
XML_TRUE) != XML_STATUS_ERROR)
fail("Failed to double-stop parser");
XML_ParserReset(parser, NULL);
XML_SetCharacterDataHandler(parser, parser_stop_character_handler);
resumable = XML_TRUE;
abortable = XML_FALSE;
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
XML_TRUE) != XML_STATUS_SUSPENDED)
fail("Failed to double-suspend parser");
XML_ParserReset(parser, NULL);
XML_SetCharacterDataHandler(parser, parser_stop_character_handler);
resumable = XML_TRUE;
abortable = XML_TRUE;
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
XML_TRUE) != XML_STATUS_ERROR)
fail("Failed to suspend-abort parser");
}
END_TEST
START_TEST(test_good_cdata_ascii)
{
const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>";
const XML_Char *expected = XCS("<greeting>Hello, world!</greeting>");
CharData storage;
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
XML_SetCharacterDataHandler(parser, accumulate_characters);
/* Add start and end handlers for coverage */
XML_SetStartCdataSectionHandler(parser, dummy_start_cdata_handler);
XML_SetEndCdataSectionHandler(parser, dummy_end_cdata_handler);
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
CharData_CheckXMLChars(&storage, expected);
/* Try again, this time with a default handler */
XML_ParserReset(parser, NULL);
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
XML_SetCharacterDataHandler(parser, accumulate_characters);
XML_SetDefaultHandler(parser, dummy_default_handler);
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
CharData_CheckXMLChars(&storage, expected);
}
END_TEST
START_TEST(test_good_cdata_utf16)
{
/* Test data is:
* <?xml version='1.0' encoding='utf-16'?>
* <a><![CDATA[hello]]></a>
*/
const char text[] =
"\0<\0?\0x\0m\0l\0"
" \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
" \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0""1\0""6\0'"
"\0?\0>\0\n"
"\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>";
const XML_Char *expected = XCS("hello");
CharData storage;
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
XML_SetCharacterDataHandler(parser, accumulate_characters);
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
CharData_CheckXMLChars(&storage, expected);
}
END_TEST
START_TEST(test_good_cdata_utf16_le)
{
/* Test data is:
* <?xml version='1.0' encoding='utf-16'?>
* <a><![CDATA[hello]]></a>
*/
const char text[] =
"<\0?\0x\0m\0l\0"
" \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
" \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0""1\0""6\0'"
"\0?\0>\0\n"
"\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0";
const XML_Char *expected = XCS("hello");
CharData storage;
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
XML_SetCharacterDataHandler(parser, accumulate_characters);
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
CharData_CheckXMLChars(&storage, expected);
}
END_TEST
/* Test UTF16 conversion of a long cdata string */
/* 16 characters: handy macro to reduce visual clutter */
#define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"
START_TEST(test_long_cdata_utf16)
{
/* Test data is:
* <?xlm version='1.0' encoding='utf-16'?>
* <a><![CDATA[
* ABCDEFGHIJKLMNOP
* ]]></a>
*/
const char text[] =
"\0<\0?\0x\0m\0l\0 "
"\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 "
"\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>"
"\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
/* 64 characters per line */
A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
A_TO_P_IN_UTF16
"\0]\0]\0>\0<\0/\0a\0>";
const XML_Char *expected =
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
XCS("ABCDEFGHIJKLMNOP";)
CharData storage;
void *buffer;
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
XML_SetCharacterDataHandler(parser, accumulate_characters);
buffer = XML_GetBuffer(parser, sizeof(text) - 1);
if (buffer == NULL)
fail("Could not allocate parse buffer");
memcpy(buffer, text, sizeof(text) - 1);
if (XML_ParseBuffer(parser,
sizeof(text) - 1,
XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
CharData_CheckXMLChars(&storage, expected);
}
END_TEST
/* Test handling of multiple unit UTF-16 characters */
START_TEST(test_multichar_cdata_utf16)
{
/* Test data is:
* <?xml version='1.0' encoding='utf-16'?>
* <a><![CDATA[{MINIM}{CROTCHET}]]></a>
*
* where {MINIM} is U+1d15e (a minim or half-note)
* UTF-16: 0xd834 0xdd5e
* UTF-8: 0xf0 0x9d 0x85 0x9e
* and {CROTCHET} is U+1d15f (a crotchet or quarter-note)
* UTF-16: 0xd834 0xdd5f
* UTF-8: 0xf0 0x9d 0x85 0x9f
*/
const char text[] =
"\0<\0?\0x\0m\0l\0"
" \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
" \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0""1\0""6\0'"
"\0?\0>\0\n"
"\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
"\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f"
"\0]\0]\0>\0<\0/\0a\0>";
#ifdef XML_UNICODE
const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f");
#else
const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f");
#endif
CharData storage;
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
XML_SetCharacterDataHandler(parser, accumulate_characters);
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
CharData_CheckXMLChars(&storage, expected);
}
END_TEST
/* Test that an element name with a UTF-16 surrogate pair is rejected */
START_TEST(test_utf16_bad_surrogate_pair)
{
/* Test data is:
* <?xml version='1.0' encoding='utf-16'?>
* <a><![CDATA[{BADLINB}]]></a>
*
* where {BADLINB} is U+10000 (the first Linear B character)
* with the UTF-16 surrogate pair in the wrong order, i.e.
* 0xdc00 0xd800
*/
const char text[] =
"\0<\0?\0x\0m\0l\0"
" \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
" \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0""1\0""6\0'"
"\0?\0>\0\n"
"\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
"\xdc\x00\xd8\x00"
"\0]\0]\0>\0<\0/\0a\0>";
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)sizeof(text) - 1,
XML_TRUE) != XML_STATUS_ERROR)
fail("Reversed UTF-16 surrogate pair not faulted");
if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)
xml_failure(parser);
}
END_TEST
START_TEST(test_bad_cdata)
{
struct CaseData {
const char *text;
enum XML_Error expectedError;
};
struct CaseData cases[] = {
{"<a><", XML_ERROR_UNCLOSED_TOKEN},
{"<a><!", XML_ERROR_UNCLOSED_TOKEN},
{"<a><![", XML_ERROR_UNCLOSED_TOKEN},
{"<a><![C", XML_ERROR_UNCLOSED_TOKEN},
{"<a><![CD", XML_ERROR_UNCLOSED_TOKEN},
{"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN},
{"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN},
{"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN},
{"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION},
{"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION},
{"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION},
{"<a><!<a/>", XML_ERROR_INVALID_TOKEN},
{"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
{"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
{"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN},
{"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN},
{"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN},
{"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN},
{"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
{"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
{"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}
};
size_t i = 0;
for (; i < sizeof(cases) / sizeof(struct CaseData); i++) {
const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES(
parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE);
const enum XML_Error actualError = XML_GetErrorCode(parser);
assert(actualStatus == XML_STATUS_ERROR);
if (actualError != cases[i].expectedError) {
char message[100];
sprintf(message, "Expected error %d but got error %d for case %u: \"%s\"\n",
cases[i].expectedError, actualError, (unsigned int)i + 1, cases[i].text);
fail(message);
}
XML_ParserReset(parser, NULL);
}
}
END_TEST
/* Test failures in UTF-16 CDATA */
START_TEST(test_bad_cdata_utf16)
{
struct CaseData {
size_t text_bytes;
const char *text;
enum XML_Error expected_error;
};
const char prolog[] =
"\0<\0?\0x\0m\0l\0"
" \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
" \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0""1\0""6\0'"
"\0?\0>\0\n"
"\0<\0a\0>";
struct CaseData cases[] = {
{1, "\0", XML_ERROR_UNCLOSED_TOKEN},
{2, "\0<", XML_ERROR_UNCLOSED_TOKEN},
{3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN},
{4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN},
{5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN},
{6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN},
{7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN},
{8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN},
{9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN},
{10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN},
{11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN},
{12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN},
{13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN},
{14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN},
{15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN},
{16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN},
{17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN},
{18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[",
XML_ERROR_UNCLOSED_CDATA_SECTION},
{19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0",
XML_ERROR_UNCLOSED_CDATA_SECTION},
{20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z",
XML_ERROR_UNCLOSED_CDATA_SECTION},
/* Now add a four-byte UTF-16 character */
{21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8",
XML_ERROR_UNCLOSED_CDATA_SECTION},
{22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34",
XML_ERROR_PARTIAL_CHAR},
{23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd",
XML_ERROR_PARTIAL_CHAR},
{24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e",
XML_ERROR_UNCLOSED_CDATA_SECTION}
};
size_t i;
for (i = 0; i < sizeof(cases)/sizeof(struct CaseData); i++) {
enum XML_Status actual_status;
enum XML_Error actual_error;
if (_XML_Parse_SINGLE_BYTES(parser, prolog, (int)sizeof(prolog)-1,
XML_FALSE) == XML_STATUS_ERROR)
xml_failure(parser);
actual_status = _XML_Parse_SINGLE_BYTES(parser,
cases[i].text,
(int)cases[i].text_bytes,
XML_TRUE);
assert(actual_status == XML_STATUS_ERROR);
actual_error = XML_GetErrorCode(parser);
if (actual_error != cases[i].expected_error) {
char message[1024];
sprintf(message,
"Expected error %d (%" XML_FMT_STR
"), got %d (%" XML_FMT_STR ") for case %lu\n",
cases[i].expected_error,
XML_ErrorString(cases[i].expected_error),
actual_error,
XML_ErrorString(actual_error),
(long unsigned)(i+1));
fail(message);
}
XML_ParserReset(parser, NULL);
}
}
END_TEST
static const char *long_cdata_text =
"<s><![CDATA["
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"]]></s>";
/* Test stopping the parser in cdata handler */
START_TEST(test_stop_parser_between_cdata_calls)
{
const char *text = long_cdata_text;
XML_SetCharacterDataHandler(parser,
clearing_aborting_character_handler);
resumable = XML_FALSE;
expect_failure(text, XML_ERROR_ABORTED,
"Parse not aborted in CDATA handler");
}
END_TEST
/* Test suspending the parser in cdata handler */
START_TEST(test_suspend_parser_between_cdata_calls)
{
const char *text = long_cdata_text;
enum XML_Status result;
XML_SetCharacterDataHandler(parser,
clearing_aborting_character_handler);
resumable = XML_TRUE;
result = _XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE);
if (result != XML_STATUS_SUSPENDED) {
if (result == XML_STATUS_ERROR)
xml_failure(parser);
fail("Parse not suspended in CDATA handler");
}
if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
xml_failure(parser);
}
END_TEST
/* Test memory allocation functions */
START_TEST(test_memory_allocation)
{
char *buffer = (char *)XML_MemMalloc(parser, 256);
char *p;
if (buffer == NULL) {
fail("Allocation failed");
} else {
/* Try writing to memory; some OSes try to cheat! */
buffer[0] = 'T';
buffer[1] = 'E';
buffer[2] = 'S';
buffer[3] = 'T';
buffer[4] = '\0';
if (strcmp(buffer, "TEST") != 0) {
fail("Memory not writable");
} else {
p = (char *)XML_MemRealloc(parser, buffer, 512);
if (p == NULL) {
fail("Reallocation failed");
} else {
/* Write again, just to be sure */
buffer = p;
buffer[0] = 'V';
if (strcmp(buffer, "VEST") != 0) {
fail("Reallocated memory not writable");
}
}
}
XML_MemFree(parser, buffer);
}
}
END_TEST
static void XMLCALL
record_default_handler(void *userData,
const XML_Char *UNUSED_P(s),
int UNUSED_P(len))
{
CharData_AppendXMLChars((CharData *)userData, XCS("D"), 1);
}
static void XMLCALL
record_cdata_handler(void *userData,
const XML_Char *UNUSED_P(s),
int UNUSED_P(len))
{
CharData_AppendXMLChars((CharData *)userData, XCS("C"), 1);
XML_DefaultCurrent(parser);
}
static void XMLCALL
record_cdata_nodefault_handler(void *userData,
const XML_Char *UNUSED_P(s),
int UNUSED_P(len))
{
CharData_AppendXMLChars((CharData *)userData, XCS("c"), 1);
}
static void XMLCALL
record_skip_handler(void *userData,
const XML_Char *UNUSED_P(entityName),
int is_parameter_entity)
{
CharData_AppendXMLChars((CharData *)userData,
is_parameter_entity ? XCS("E") : XCS("e"), 1);
}
/* Test XML_DefaultCurrent() passes handling on correctly */
START_TEST(test_default_current)
{
const char *text = "<doc>hell]</doc>";
const char *entity_text =
"<!DOCTYPE doc [\n"
"<!ENTITY entity '&#37;'>\n"
"]>\n"
"<doc>&entity;</doc>";
CharData storage;
XML_SetDefaultHandler(parser, record_default_handler);
XML_SetCharacterDataHandler(parser, record_cdata_handler);
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
CharData_CheckXMLChars(&storage, XCS("DCDCDCDCDCDD"));
/* Again, without the defaulting */
XML_ParserReset(parser, NULL);
XML_SetDefaultHandler(parser, record_default_handler);
XML_SetCharacterDataHandler(parser, record_cdata_nodefault_handler);
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
CharData_CheckXMLChars(&storage, XCS("DcccccD"));
/* Now with an internal entity to complicate matters */
XML_ParserReset(parser, NULL);
XML_SetDefaultHandler(parser, record_default_handler);
XML_SetCharacterDataHandler(parser, record_cdata_handler);
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
if (_XML_Parse_SINGLE_BYTES(parser, entity_text, (int)strlen(entity_text),
XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
/* The default handler suppresses the entity */
CharData_CheckXMLChars(&storage, XCS("DDDDDDDDDDDDDDDDDDD"));
/* Again, with a skip handler */
XML_ParserReset(parser, NULL);
XML_SetDefaultHandler(parser, record_default_handler);
XML_SetCharacterDataHandler(parser, record_cdata_handler);
XML_SetSkippedEntityHandler(parser, record_skip_handler);
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
if (_XML_Parse_SINGLE_BYTES(parser, entity_text, (int)strlen(entity_text),
XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
/* The default handler suppresses the entity */