blob: aa95fd377b82d1ce20b60eeb7a2d9dbd23f5f2e6 [file] [log] [blame]
/* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
See the file COPYING for copying permission.
runtest.c : run the Expat test suite
*/
#ifdef HAVE_EXPAT_CONFIG_H
#include <expat_config.h>
#endif
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <stddef.h> /* ptrdiff_t */
#include <ctype.h>
#ifndef __cplusplus
# include <stdbool.h>
#endif
#include <limits.h>
#include "expat.h"
#include "chardata.h"
#include "internal.h" /* for UNUSED_P only */
#include "minicheck.h"
#include "memcheck.h"
#include "siphash.h"
#include "ascii.h" /* for ASCII_xxx */
#ifdef XML_LARGE_SIZE
#define XML_FMT_INT_MOD "ll"
#else
#define XML_FMT_INT_MOD "l"
#endif
#if defined(NDEBUG)
# error \
The test suite relies on assert(...) at the moment. \
You have NDEBUG defined which removes that code so that failures in the \
test suite can go unnoticed. \
\
While we rely on assert(...), compiling the test suite with NDEBUG \
defined is not supported.
#endif
static XML_Parser parser = NULL;
static void
basic_setup(void)
{
parser = XML_ParserCreate(NULL);
if (parser == NULL)
fail("Parser not created.");
}
static void
basic_teardown(void)
{
if (parser != NULL) {
XML_ParserFree(parser);
parser = NULL;
}
}
/* Generate a failure using the parser state to create an error message;
this should be used when the parser reports an error we weren't
expecting.
*/
static void
_xml_failure(XML_Parser parser, const char *file, int line)
{
char buffer[1024];
enum XML_Error err = XML_GetErrorCode(parser);
sprintf(buffer,
" %d: %s (line %" XML_FMT_INT_MOD "u, offset %"\
XML_FMT_INT_MOD "u)\n reported from %s, line %d\n",
err,
XML_ErrorString(err),
XML_GetCurrentLineNumber(parser),
XML_GetCurrentColumnNumber(parser),
file, line);
_fail_unless(0, file, line, buffer);
}
static enum XML_Status
_XML_Parse_SINGLE_BYTES(XML_Parser parser, const char *s, int len, int isFinal)
{
enum XML_Status res = XML_STATUS_ERROR;
int offset = 0;
if (len == 0) {
return XML_Parse(parser, s, len, isFinal);
}
for (; offset < len; offset++) {
const int innerIsFinal = (offset == len - 1) && isFinal;
const char c = s[offset]; /* to help out-of-bounds detection */
res = XML_Parse(parser, &c, sizeof(char), innerIsFinal);
if (res != XML_STATUS_OK) {
return res;
}
}
return res;
}
#define xml_failure(parser) _xml_failure((parser), __FILE__, __LINE__)
static void
_expect_failure(const char *text, enum XML_Error errorCode, const char *errorMessage,
const char *file, int lineno)
{
if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_OK)
/* Hackish use of _fail_unless() macro, but let's us report
the right filename and line number. */
_fail_unless(0, file, lineno, errorMessage);
if (XML_GetErrorCode(parser) != errorCode)
_xml_failure(parser, file, lineno);
}
#define expect_failure(text, errorCode, errorMessage) \
_expect_failure((text), (errorCode), (errorMessage), \
__FILE__, __LINE__)
/* Dummy handlers for when we need to set a handler to tickle a bug,
but it doesn't need to do anything.
*/
static unsigned long dummy_handler_flags = 0;
#define DUMMY_START_DOCTYPE_HANDLER_FLAG (1UL << 0)
#define DUMMY_END_DOCTYPE_HANDLER_FLAG (1UL << 1)
#define DUMMY_ENTITY_DECL_HANDLER_FLAG (1UL << 2)
#define DUMMY_NOTATION_DECL_HANDLER_FLAG (1UL << 3)
#define DUMMY_ELEMENT_DECL_HANDLER_FLAG (1UL << 4)
#define DUMMY_ATTLIST_DECL_HANDLER_FLAG (1UL << 5)
#define DUMMY_COMMENT_HANDLER_FLAG (1UL << 6)
#define DUMMY_PI_HANDLER_FLAG (1UL << 7)
#define DUMMY_START_ELEMENT_HANDLER_FLAG (1UL << 8)
#define DUMMY_START_CDATA_HANDLER_FLAG (1UL << 9)
#define DUMMY_END_CDATA_HANDLER_FLAG (1UL << 10)
#define DUMMY_UNPARSED_ENTITY_DECL_HANDLER_FLAG (1UL << 11)
#define DUMMY_START_NS_DECL_HANDLER_FLAG (1UL << 12)
#define DUMMY_END_NS_DECL_HANDLER_FLAG (1UL << 13)
#define DUMMY_START_DOCTYPE_DECL_HANDLER_FLAG (1UL << 14)
#define DUMMY_END_DOCTYPE_DECL_HANDLER_FLAG (1UL << 15)
#define DUMMY_SKIP_HANDLER_FLAG (1UL << 16)
#define DUMMY_DEFAULT_HANDLER_FLAG (1UL << 17)
static void XMLCALL
dummy_xdecl_handler(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(version),
const XML_Char *UNUSED_P(encoding),
int UNUSED_P(standalone))
{}
static void XMLCALL
dummy_start_doctype_handler(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(doctypeName),
const XML_Char *UNUSED_P(sysid),
const XML_Char *UNUSED_P(pubid),
int UNUSED_P(has_internal_subset))
{
dummy_handler_flags |= DUMMY_START_DOCTYPE_HANDLER_FLAG;
}
static void XMLCALL
dummy_end_doctype_handler(void *UNUSED_P(userData))
{
dummy_handler_flags |= DUMMY_END_DOCTYPE_HANDLER_FLAG;
}
static void XMLCALL
dummy_entity_decl_handler(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(entityName),
int UNUSED_P(is_parameter_entity),
const XML_Char *UNUSED_P(value),
int UNUSED_P(value_length),
const XML_Char *UNUSED_P(base),
const XML_Char *UNUSED_P(systemId),
const XML_Char *UNUSED_P(publicId),
const XML_Char *UNUSED_P(notationName))
{
dummy_handler_flags |= DUMMY_ENTITY_DECL_HANDLER_FLAG;
}
static void XMLCALL
dummy_notation_decl_handler(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(notationName),
const XML_Char *UNUSED_P(base),
const XML_Char *UNUSED_P(systemId),
const XML_Char *UNUSED_P(publicId))
{
dummy_handler_flags |= DUMMY_NOTATION_DECL_HANDLER_FLAG;
}
static void XMLCALL
dummy_element_decl_handler(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(name),
XML_Content *model)
{
/* The content model must be freed by the handler. Unfortunately
* we cannot pass the parser as the userData because this is used
* with other handlers that require other userData.
*/
XML_FreeContentModel(parser, model);
dummy_handler_flags |= DUMMY_ELEMENT_DECL_HANDLER_FLAG;
}
static void XMLCALL
dummy_attlist_decl_handler(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(elname),
const XML_Char *UNUSED_P(attname),
const XML_Char *UNUSED_P(att_type),
const XML_Char *UNUSED_P(dflt),
int UNUSED_P(isrequired))
{
dummy_handler_flags |= DUMMY_ATTLIST_DECL_HANDLER_FLAG;
}
static void XMLCALL
dummy_comment_handler(void *UNUSED_P(userData), const XML_Char *UNUSED_P(data))
{
dummy_handler_flags |= DUMMY_COMMENT_HANDLER_FLAG;
}
static void XMLCALL
dummy_pi_handler(void *UNUSED_P(userData), const XML_Char *UNUSED_P(target), const XML_Char *UNUSED_P(data))
{
dummy_handler_flags |= DUMMY_PI_HANDLER_FLAG;
}
static void XMLCALL
dummy_start_element(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(name), const XML_Char **UNUSED_P(atts))
{
dummy_handler_flags |= DUMMY_START_ELEMENT_HANDLER_FLAG;
}
static void XMLCALL
dummy_end_element(void *UNUSED_P(userData), const XML_Char *UNUSED_P(name))
{}
static void XMLCALL
dummy_start_cdata_handler(void *UNUSED_P(userData))
{
dummy_handler_flags |= DUMMY_START_CDATA_HANDLER_FLAG;
}
static void XMLCALL
dummy_end_cdata_handler(void *UNUSED_P(userData))
{
dummy_handler_flags |= DUMMY_END_CDATA_HANDLER_FLAG;
}
static void XMLCALL
dummy_cdata_handler(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(s),
int UNUSED_P(len))
{}
static void XMLCALL
dummy_start_namespace_decl_handler(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(prefix),
const XML_Char *UNUSED_P(uri))
{
dummy_handler_flags |= DUMMY_START_NS_DECL_HANDLER_FLAG;
}
static void XMLCALL
dummy_end_namespace_decl_handler(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(prefix))
{
dummy_handler_flags |= DUMMY_END_NS_DECL_HANDLER_FLAG;
}
/* This handler is obsolete, but while the code exists we should
* ensure that dealing with the handler is covered by tests.
*/
static void XMLCALL
dummy_unparsed_entity_decl_handler(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(entityName),
const XML_Char *UNUSED_P(base),
const XML_Char *UNUSED_P(systemId),
const XML_Char *UNUSED_P(publicId),
const XML_Char *UNUSED_P(notationName))
{
dummy_handler_flags |= DUMMY_UNPARSED_ENTITY_DECL_HANDLER_FLAG;
}
static void XMLCALL
dummy_default_handler(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(s),
int UNUSED_P(len))
{}
static void XMLCALL
dummy_start_doctype_decl_handler(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(doctypeName),
const XML_Char *UNUSED_P(sysid),
const XML_Char *UNUSED_P(pubid),
int UNUSED_P(has_internal_subset))
{
dummy_handler_flags |= DUMMY_START_DOCTYPE_DECL_HANDLER_FLAG;
}
static void XMLCALL
dummy_end_doctype_decl_handler(void *UNUSED_P(userData))
{
dummy_handler_flags |= DUMMY_END_DOCTYPE_DECL_HANDLER_FLAG;
}
static void XMLCALL
dummy_skip_handler(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(entityName),
int UNUSED_P(is_parameter_entity))
{
dummy_handler_flags |= DUMMY_SKIP_HANDLER_FLAG;
}
/* Useful external entity handler */
typedef struct ExtOption {
const char *system_id;
const char *parse_text;
} ExtOption;
static int XMLCALL
external_entity_optioner(XML_Parser parser,
const XML_Char *context,
const XML_Char *UNUSED_P(base),
const XML_Char *systemId,
const XML_Char *UNUSED_P(publicId))
{
ExtOption *options = (ExtOption *)XML_GetUserData(parser);
XML_Parser ext_parser;
while (options->parse_text != NULL) {
if (!strcmp(systemId, options->system_id)) {
enum XML_Status rc;
ext_parser =
XML_ExternalEntityParserCreate(parser, context, NULL);
if (ext_parser == NULL)
return XML_STATUS_ERROR;
rc = _XML_Parse_SINGLE_BYTES(ext_parser, options->parse_text,
strlen(options->parse_text),
XML_TRUE);
XML_ParserFree(ext_parser);
return rc;
}
options++;
}
fail("No suitable option found");
return XML_STATUS_ERROR;
}
/*
* Parameter entity evaluation support.
*/
#define ENTITY_MATCH_FAIL (-1)
#define ENTITY_MATCH_NOT_FOUND (0)
#define ENTITY_MATCH_SUCCESS (1)
static const XML_Char *entity_name_to_match = NULL;
static const XML_Char *entity_value_to_match = NULL;
static int entity_match_flag = ENTITY_MATCH_NOT_FOUND;
static void XMLCALL
param_entity_match_handler(void *UNUSED_P(userData),
const XML_Char *entityName,
int is_parameter_entity,
const XML_Char *value,
int value_length,
const XML_Char *UNUSED_P(base),
const XML_Char *UNUSED_P(systemId),
const XML_Char *UNUSED_P(publicId),
const XML_Char *UNUSED_P(notationName))
{
if (!is_parameter_entity ||
entity_name_to_match == NULL ||
entity_value_to_match == NULL) {
return;
}
if (!strcmp(entityName, entity_name_to_match)) {
/* The cast here is safe because we control the horizontal and
* the vertical, and we therefore know our strings are never
* going to overflow an int.
*/
if (value_length != (int)strlen(entity_value_to_match) ||
strncmp(value, entity_value_to_match, value_length)) {
entity_match_flag = ENTITY_MATCH_FAIL;
} else {
entity_match_flag = ENTITY_MATCH_SUCCESS;
}
}
/* Else leave the match flag alone */
}
/*
* Character & encoding tests.
*/
START_TEST(test_nul_byte)
{
char text[] = "<doc>\0</doc>";
/* test that a NUL byte (in US-ASCII data) is an error */
if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text) - 1, XML_TRUE) == XML_STATUS_OK)
fail("Parser did not report error on NUL-byte.");
if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)
xml_failure(parser);
}
END_TEST
START_TEST(test_u0000_char)
{
/* test that a NUL byte (in US-ASCII data) is an error */
expect_failure("<doc>&#0;</doc>",
XML_ERROR_BAD_CHAR_REF,
"Parser did not report error on NUL-byte.");
}
END_TEST
START_TEST(test_siphash_self)
{
if (! sip24_valid())
fail("SipHash self-test failed");
}
END_TEST
START_TEST(test_siphash_spec)
{
/* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */
const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
"\x0a\x0b\x0c\x0d\x0e";
const size_t len = sizeof(message) - 1;
const uint64_t expected = _SIP_ULL(0xa129ca61U, 0x49be45e5U);
struct siphash state;
struct sipkey key;
(void)sip_tobin;
sip_tokey(&key,
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
"\x0a\x0b\x0c\x0d\x0e\x0f");
sip24_init(&state, &key);
/* Cover spread across calls */
sip24_update(&state, message, 4);
sip24_update(&state, message + 4, len - 4);
/* Cover null length */
sip24_update(&state, message, 0);
if (sip24_final(&state) != expected)
fail("sip24_final failed spec test\n");
/* Cover wrapper */
if (siphash24(message, len, &key) != expected)
fail("siphash24 failed spec test\n");
}
END_TEST
START_TEST(test_bom_utf8)
{
/* This test is really just making sure we don't core on a UTF-8 BOM. */
const char *text = "\357\273\277<e/>";
if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
START_TEST(test_bom_utf16_be)
{
char text[] = "\376\377\0<\0e\0/\0>";
if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
START_TEST(test_bom_utf16_le)
{
char text[] = "\377\376<\0e\0/\0>\0";
if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
/* Parse whole buffer at once to exercise a different code path */
START_TEST(test_nobom_utf16_le)
{
char text[] = " \0<\0e\0/\0>\0";
if (XML_Parse(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
static void XMLCALL
accumulate_characters(void *userData, const XML_Char *s, int len)
{
CharData_AppendXMLChars((CharData *)userData, s, len);
}
static void XMLCALL
accumulate_attribute(void *userData, const XML_Char *UNUSED_P(name),
const XML_Char **atts)
{
CharData *storage = (CharData *)userData;
/* Check there are attributes to deal with */
if (atts == NULL)
return;
while (storage->count < 0 && atts[0] != NULL) {
/* "accumulate" the value of the first attribute we see */
CharData_AppendXMLChars(storage, atts[1], -1);
atts += 2;
}
}
static void
_run_character_check(const XML_Char *text, const XML_Char *expected,
const char *file, int line)
{
CharData storage;
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
XML_SetCharacterDataHandler(parser, accumulate_characters);
if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
_xml_failure(parser, file, line);
CharData_CheckXMLChars(&storage, expected);
}
#define run_character_check(text, expected) \
_run_character_check(text, expected, __FILE__, __LINE__)
static void
_run_attribute_check(const XML_Char *text, const XML_Char *expected,
const char *file, int line)
{
CharData storage;
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
XML_SetStartElementHandler(parser, accumulate_attribute);
if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
_xml_failure(parser, file, line);
CharData_CheckXMLChars(&storage, expected);
}
#define run_attribute_check(text, expected) \
_run_attribute_check(text, expected, __FILE__, __LINE__)
typedef struct ExtTest {
const char *parse_text;
const char *encoding;
CharData *storage;
} ExtTest;
static void XMLCALL
ext_accumulate_characters(void *userData, const XML_Char *s, int len)
{
ExtTest *test_data = (ExtTest *)userData;
accumulate_characters(test_data->storage, s, len);
}
static void
_run_ext_character_check(const XML_Char *text,
ExtTest *test_data,
const XML_Char *expected,
const char *file, int line)
{
CharData storage;
CharData_Init(&storage);
test_data->storage = &storage;
XML_SetUserData(parser, test_data);
XML_SetCharacterDataHandler(parser, ext_accumulate_characters);
if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text),
XML_TRUE) == XML_STATUS_ERROR)
_xml_failure(parser, file, line);
CharData_CheckXMLChars(&storage, expected);
}
#define run_ext_character_check(text, test_data, expected) \
_run_ext_character_check(text, test_data, expected, __FILE__, __LINE__)
/* Regression test for SF bug #491986. */
START_TEST(test_danish_latin1)
{
const char *text =
"<?xml version='1.0' encoding='iso-8859-1'?>\n"
"<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>";
run_character_check(text,
"J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85");
}
END_TEST
/* Regression test for SF bug #514281. */
START_TEST(test_french_charref_hexidecimal)
{
const char *text =
"<?xml version='1.0' encoding='iso-8859-1'?>\n"
"<doc>&#xE9;&#xE8;&#xE0;&#xE7;&#xEA;&#xC8;</doc>";
run_character_check(text,
"\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
}
END_TEST
START_TEST(test_french_charref_decimal)
{
const char *text =
"<?xml version='1.0' encoding='iso-8859-1'?>\n"
"<doc>&#233;&#232;&#224;&#231;&#234;&#200;</doc>";
run_character_check(text,
"\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
}
END_TEST
START_TEST(test_french_latin1)
{
const char *text =
"<?xml version='1.0' encoding='iso-8859-1'?>\n"
"<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
run_character_check(text,
"\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
}
END_TEST
START_TEST(test_french_utf8)
{
const char *text =
"<?xml version='1.0' encoding='utf-8'?>\n"
"<doc>\xC3\xA9</doc>";
run_character_check(text, "\xC3\xA9");
}
END_TEST
/* Regression test for SF bug #600479.
XXX There should be a test that exercises all legal XML Unicode
characters as PCDATA and attribute value content, and XML Name
characters as part of element and attribute names.
*/
START_TEST(test_utf8_false_rejection)
{
const char *text = "<doc>\xEF\xBA\xBF</doc>";
run_character_check(text, "\xEF\xBA\xBF");
}
END_TEST
/* Regression test for SF bug #477667.
This test assures that any 8-bit character followed by a 7-bit
character will not be mistakenly interpreted as a valid UTF-8
sequence.
*/
START_TEST(test_illegal_utf8)
{
char text[100];
int i;
for (i = 128; i <= 255; ++i) {
sprintf(text, "<e>%ccd</e>", i);
if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_OK) {
sprintf(text,
"expected token error for '%c' (ordinal %d) in UTF-8 text",
i, i);
fail(text);
}
else if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)
xml_failure(parser);
/* Reset the parser since we use the same parser repeatedly. */
XML_ParserReset(parser, NULL);
}
}
END_TEST
/* Examples, not masks: */
#define UTF8_LEAD_1 "\x7f" /* 0b01111111 */
#define UTF8_LEAD_2 "\xdf" /* 0b11011111 */
#define UTF8_LEAD_3 "\xef" /* 0b11101111 */
#define UTF8_LEAD_4 "\xf7" /* 0b11110111 */
#define UTF8_FOLLOW "\xbf" /* 0b10111111 */
START_TEST(test_utf8_auto_align)
{
struct TestCase {
ptrdiff_t expectedMovementInChars;
const char * input;
};
struct TestCase cases[] = {
{00, ""},
{00, UTF8_LEAD_1},
{-1, UTF8_LEAD_2},
{00, UTF8_LEAD_2 UTF8_FOLLOW},
{-1, UTF8_LEAD_3},
{-2, UTF8_LEAD_3 UTF8_FOLLOW},
{00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW},
{-1, UTF8_LEAD_4},
{-2, UTF8_LEAD_4 UTF8_FOLLOW},
{-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW},
{00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW},
};
size_t i = 0;
bool success = true;
for (; i < sizeof(cases) / sizeof(*cases); i++) {
const char * fromLim = cases[i].input + strlen(cases[i].input);
const char * const fromLimInitially = fromLim;
ptrdiff_t actualMovementInChars;
align_limit_to_full_utf8_characters(cases[i].input, &fromLim);
actualMovementInChars = (fromLim - fromLimInitially);
if (actualMovementInChars != cases[i].expectedMovementInChars) {
size_t j = 0;
success = false;
printf("[-] UTF-8 case %2u: Expected movement by %2d chars"
", actually moved by %2d chars: \"",
(unsigned)(i + 1),
(int)cases[i].expectedMovementInChars,
(int)actualMovementInChars);
for (; j < strlen(cases[i].input); j++) {
printf("\\x%02x", (unsigned char)cases[i].input[j]);
}
printf("\"\n");
}
}
if (! success) {
fail("UTF-8 auto-alignment is not bullet-proof\n");
}
}
END_TEST
START_TEST(test_utf16)
{
/* <?xml version="1.0" encoding="UTF-16"?>
* <doc a='123'>some {A} text</doc>
*
* where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A
*/
char text[] =
"\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o"
"\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o"
"\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066"
"\000'\000?\000>\000\n"
"\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>"
"\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000"
"<\000/\000d\000o\000c\000>";
char expected[] = "some \357\274\241 text";
CharData storage;
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
XML_SetCharacterDataHandler(parser, accumulate_characters);
if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
CharData_CheckXMLChars(&storage, expected);
}
END_TEST
START_TEST(test_utf16_le_epilog_newline)
{
unsigned int first_chunk_bytes = 17;
char text[] =
"\xFF\xFE" /* BOM */
"<\000e\000/\000>\000" /* document element */
"\r\000\n\000\r\000\n\000"; /* epilog */
if (first_chunk_bytes >= sizeof(text) - 1)
fail("bad value of first_chunk_bytes");
if ( _XML_Parse_SINGLE_BYTES(parser, text, first_chunk_bytes, XML_FALSE)
== XML_STATUS_ERROR)
xml_failure(parser);
else {
enum XML_Status rc;
rc = _XML_Parse_SINGLE_BYTES(parser, text + first_chunk_bytes,
sizeof(text) - first_chunk_bytes - 1, XML_TRUE);
if (rc == XML_STATUS_ERROR)
xml_failure(parser);
}
}
END_TEST
/* Test that an outright lie in the encoding is faulted */
START_TEST(test_not_utf16)
{
const char *text =
"<?xml version='1.0' encoding='utf-16'?>"
"<doc>Hi</doc>";
/* Use a handler to provoke the appropriate code paths */
XML_SetXmlDeclHandler(parser, dummy_xdecl_handler);
expect_failure(text,
XML_ERROR_INCORRECT_ENCODING,
"UTF-16 declared in UTF-8 not faulted");
}
END_TEST
/* Test that an unknown encoding is rejected */
START_TEST(test_bad_encoding)
{
const char *text = "<doc>Hi</doc>";
if (!XML_SetEncoding(parser, "unknown-encoding"))
fail("XML_SetEncoding failed");
expect_failure(text,
XML_ERROR_UNKNOWN_ENCODING,
"Unknown encoding not faulted");
}
END_TEST
/* Regression test for SF bug #481609, #774028. */
START_TEST(test_latin1_umlauts)
{
const char *text =
"<?xml version='1.0' encoding='iso-8859-1'?>\n"
"<e a='\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; >'\n"
" >\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; ></e>";
const char *utf8 =
"\xC3\xA4 \xC3\xB6 \xC3\xBC "
"\xC3\xA4 \xC3\xB6 \xC3\xBC "
"\xC3\xA4 \xC3\xB6 \xC3\xBC >";
run_character_check(text, utf8);
XML_ParserReset(parser, NULL);
run_attribute_check(text, utf8);
/* Repeat with a default handler */
XML_ParserReset(parser, NULL);
XML_SetDefaultHandler(parser, dummy_default_handler);
run_character_check(text, utf8);
XML_ParserReset(parser, NULL);
XML_SetDefaultHandler(parser, dummy_default_handler);
run_attribute_check(text, utf8);
}
END_TEST
/* Test that an element name with a 4-byte UTF-8 character is rejected */
START_TEST(test_long_utf8_character)
{
const char *text =
"<?xml version='1.0' encoding='utf-8'?>\n"
/* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */
"<do\xf0\x90\x80\x80/>";
expect_failure(text,
XML_ERROR_INVALID_TOKEN,
"4-byte UTF-8 character in element name not faulted");
}
END_TEST
/* Test that a long latin-1 attribute (too long to convert in one go)
* is correctly converted
*/
START_TEST(test_long_latin1_attribute)
{
const char *text =
"<?xml version='1.0' encoding='iso-8859-1'?>\n"
"<doc att='"
/* 64 characters per line */
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO"
/* Last character splits across a buffer boundary */
"\xe4'>\n</doc>";
const char *expected =
/* 64 characters per line */
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO"
"\xc3\xa4";
run_attribute_check(text, expected);
}
END_TEST
/* Test that a long ASCII attribute (too long to convert in one go)
* is correctly converted
*/
START_TEST(test_long_ascii_attribute)
{
const char *text =
"<?xml version='1.0' encoding='us-ascii'?>\n"
"<doc att='"
/* 64 characters per line */
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"01234'>\n</doc>";
const char *expected =
/* 64 characters per line */
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"01234";
run_attribute_check(text, expected);
}
END_TEST
/* Regression test #1 for SF bug #653180. */
START_TEST(test_line_number_after_parse)
{
const char *text =
"<tag>\n"
"\n"
"\n</tag>";
XML_Size lineno;
if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_FALSE) == XML_STATUS_ERROR)
xml_failure(parser);
lineno = XML_GetCurrentLineNumber(parser);
if (lineno != 4) {
char buffer[100];
sprintf(buffer,
"expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno);
fail(buffer);
}
}
END_TEST
/* Regression test #2 for SF bug #653180. */
START_TEST(test_column_number_after_parse)
{
const char *text = "<tag></tag>";
XML_Size colno;
if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_FALSE) == XML_STATUS_ERROR)
xml_failure(parser);
colno = XML_GetCurrentColumnNumber(parser);
if (colno != 11) {
char buffer[100];
sprintf(buffer,
"expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno);
fail(buffer);
}
}
END_TEST
static void XMLCALL
start_element_event_handler2(void *userData, const XML_Char *name,
const XML_Char **UNUSED_P(attr))
{
CharData *storage = (CharData *) userData;
char buffer[100];
sprintf(buffer,
"<%s> at col:%" XML_FMT_INT_MOD "u line:%"\
XML_FMT_INT_MOD "u\n", name,
XML_GetCurrentColumnNumber(parser),
XML_GetCurrentLineNumber(parser));
CharData_AppendString(storage, buffer);
}
static void XMLCALL
end_element_event_handler2(void *userData, const XML_Char *name)
{
CharData *storage = (CharData *) userData;
char buffer[100];
sprintf(buffer,
"</%s> at col:%" XML_FMT_INT_MOD "u line:%"\
XML_FMT_INT_MOD "u\n", name,
XML_GetCurrentColumnNumber(parser),
XML_GetCurrentLineNumber(parser));
CharData_AppendString(storage, buffer);
}
/* Regression test #3 for SF bug #653180. */
START_TEST(test_line_and_column_numbers_inside_handlers)
{
const char *text =
"<a>\n" /* Unix end-of-line */
" <b>\r\n" /* Windows end-of-line */
" <c/>\r" /* Mac OS end-of-line */
" </b>\n"
" <d>\n"
" <f/>\n"
" </d>\n"
"</a>";
const char *expected =
"<a> at col:0 line:1\n"
"<b> at col:2 line:2\n"
"<c> at col:4 line:3\n"
"</c> at col:8 line:3\n"
"</b> at col:2 line:4\n"
"<d> at col:2 line:5\n"
"<f> at col:4 line:6\n"
"</f> at col:8 line:6\n"
"</d> at col:2 line:7\n"
"</a> at col:0 line:8\n";
CharData storage;
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
XML_SetStartElementHandler(parser, start_element_event_handler2);
XML_SetEndElementHandler(parser, end_element_event_handler2);
if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
CharData_CheckString(&storage, expected);
}
END_TEST
/* Regression test #4 for SF bug #653180. */
START_TEST(test_line_number_after_error)
{
const char *text =
"<a>\n"
" <b>\n"
" </a>"; /* missing </b> */
XML_Size lineno;
if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_FALSE) != XML_STATUS_ERROR)
fail("Expected a parse error");
lineno = XML_GetCurrentLineNumber(parser);
if (lineno != 3) {
char buffer[100];
sprintf(buffer, "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno);
fail(buffer);
}
}
END_TEST
/* Regression test #5 for SF bug #653180. */
START_TEST(test_column_number_after_error)
{
const char *text =
"<a>\n"
" <b>\n"
" </a>"; /* missing </b> */
XML_Size colno;
if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_FALSE) != XML_STATUS_ERROR)
fail("Expected a parse error");
colno = XML_GetCurrentColumnNumber(parser);
if (colno != 4) {
char buffer[100];
sprintf(buffer,
"expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno);
fail(buffer);
}
}
END_TEST
/* Regression test for SF bug #478332. */
START_TEST(test_really_long_lines)
{
/* This parses an input line longer than INIT_DATA_BUF_SIZE
characters long (defined to be 1024 in xmlparse.c). We take a
really cheesy approach to building the input buffer, because
this avoids writing bugs in buffer-filling code.
*/
const char *text =
"<e>"
/* 64 chars */
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
/* until we have at least 1024 characters on the line: */
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"</e>";
if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
/* Test cdata processing across a buffer boundary */
START_TEST(test_really_long_encoded_lines)
{
/* As above, except that we want to provoke an output buffer
* overflow with a non-trivial encoding. For this we need to pass
* the whole cdata in one go, not byte-by-byte.
*/
void *buffer;
const char *text =
"<?xml version='1.0' encoding='iso-8859-1'?>"
"<e>"
/* 64 chars */
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
/* until we have at least 1024 characters on the line: */
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
"</e>";
int parse_len = strlen(text);
/* Need a cdata handler to provoke the code path we want to test */
XML_SetCharacterDataHandler(parser, dummy_cdata_handler);
buffer = XML_GetBuffer(parser, parse_len);
if (buffer == NULL)
fail("Could not allocate parse buffer");
memcpy(buffer, text, parse_len);
if (XML_ParseBuffer(parser, parse_len, XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
/*
* Element event tests.
*/
static void XMLCALL
start_element_event_handler(void *userData,
const XML_Char *name,
const XML_Char **UNUSED_P(atts))
{
CharData_AppendXMLChars((CharData *)userData, name, -1);
}
static void XMLCALL
end_element_event_handler(void *userData, const XML_Char *name)
{
CharData *storage = (CharData *) userData;
CharData_AppendString(storage, "/");
CharData_AppendXMLChars(storage, name, -1);
}
START_TEST(test_end_element_events)
{
const char *text = "<a><b><c/></b><d><f/></d></a>";
const char *expected = "/c/b/f/d/a";
CharData storage;
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
XML_SetEndElementHandler(parser, end_element_event_handler);
if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
CharData_CheckString(&storage, expected);
}
END_TEST
/*
* Attribute tests.
*/
/* Helpers used by the following test; this checks any "attr" and "refs"
attributes to make sure whitespace has been normalized.
Return true if whitespace has been normalized in a string, using
the rules for attribute value normalization. The 'is_cdata' flag
is needed since CDATA attributes don't need to have multiple
whitespace characters collapsed to a single space, while other
attribute data types do. (Section 3.3.3 of the recommendation.)
*/
static int
is_whitespace_normalized(const XML_Char *s, int is_cdata)
{
int blanks = 0;
int at_start = 1;
while (*s) {
if (*s == ' ')
++blanks;
else if (*s == '\t' || *s == '\n' || *s == '\r')
return 0;
else {
if (at_start) {
at_start = 0;
if (blanks && !is_cdata)
/* illegal leading blanks */
return 0;
}
else if (blanks > 1 && !is_cdata)
return 0;
blanks = 0;
}
++s;
}
if (blanks && !is_cdata)
return 0;
return 1;
}
/* Check the attribute whitespace checker: */
static void
testhelper_is_whitespace_normalized(void)
{
assert(is_whitespace_normalized("abc", 0));
assert(is_whitespace_normalized("abc", 1));
assert(is_whitespace_normalized("abc def ghi", 0));
assert(is_whitespace_normalized("abc def ghi", 1));
assert(!is_whitespace_normalized(" abc def ghi", 0));
assert(is_whitespace_normalized(" abc def ghi", 1));
assert(!is_whitespace_normalized("abc def ghi", 0));
assert(is_whitespace_normalized("abc def ghi", 1));
assert(!is_whitespace_normalized("abc def ghi ", 0));
assert(is_whitespace_normalized("abc def ghi ", 1));
assert(!is_whitespace_normalized(" ", 0));
assert(is_whitespace_normalized(" ", 1));
assert(!is_whitespace_normalized("\t", 0));
assert(!is_whitespace_normalized("\t", 1));
assert(!is_whitespace_normalized("\n", 0));
assert(!is_whitespace_normalized("\n", 1));
assert(!is_whitespace_normalized("\r", 0));
assert(!is_whitespace_normalized("\r", 1));
assert(!is_whitespace_normalized("abc\t def", 1));
}
static void XMLCALL
check_attr_contains_normalized_whitespace(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(name),
const XML_Char **atts)
{
int i;
for (i = 0; atts[i] != NULL; i += 2) {
const XML_Char *attrname = atts[i];
const XML_Char *value = atts[i + 1];
if (strcmp("attr", attrname) == 0
|| strcmp("ents", attrname) == 0
|| strcmp("refs", attrname) == 0) {
if (!is_whitespace_normalized(value, 0)) {
char buffer[256];
sprintf(buffer, "attribute value not normalized: %s='%s'",
attrname, value);
fail(buffer);
}
}
}
}
START_TEST(test_attr_whitespace_normalization)
{
const char *text =
"<!DOCTYPE doc [\n"
" <!ATTLIST doc\n"
" attr NMTOKENS #REQUIRED\n"
" ents ENTITIES #REQUIRED\n"
" refs IDREFS #REQUIRED>\n"
"]>\n"
"<doc attr=' a b c\t\td\te\t' refs=' id-1 \t id-2\t\t' \n"
" ents=' ent-1 \t\r\n"
" ent-2 ' >\n"
" <e id='id-1'/>\n"
" <e id='id-2'/>\n"
"</doc>";
XML_SetStartElementHandler(parser,
check_attr_contains_normalized_whitespace);
if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
/*
* XML declaration tests.
*/
START_TEST(test_xmldecl_misplaced)
{
expect_failure("\n"
"<?xml version='1.0'?>\n"
"<a/>",
XML_ERROR_MISPLACED_XML_PI,
"failed to report misplaced XML declaration");
}
END_TEST
START_TEST(test_xmldecl_invalid)
{
expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>",
XML_ERROR_XML_DECL,
"Failed to report invalid XML declaration");
}
END_TEST
START_TEST(test_xmldecl_missing_attr)
{
expect_failure("<?xml ='1.0'?>\n<doc/>\n",
XML_ERROR_XML_DECL,
"Failed to report missing XML declaration attribute");
}
END_TEST
START_TEST(test_xmldecl_missing_value)
{
expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n"
"<doc/>",
XML_ERROR_XML_DECL,
"Failed to report missing attribute value");
}
END_TEST
/* Regression test for SF bug #584832. */
static int XMLCALL
UnknownEncodingHandler(void *UNUSED_P(data),const XML_Char *encoding,XML_Encoding *info)
{
if (strcmp(encoding,"unsupported-encoding") == 0) {
int i;
for (i = 0; i < 256; ++i)
info->map[i] = i;
info->data = NULL;
info->convert = NULL;
info->release = NULL;
return XML_STATUS_OK;
}
return XML_STATUS_ERROR;
}
START_TEST(test_unknown_encoding_internal_entity)
{
const char *text =
"<?xml version='1.0' encoding='unsupported-encoding'?>\n"
"<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
"<test a='&foo;'/>";
XML_SetUnknownEncodingHandler(parser, UnknownEncodingHandler, NULL);
if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
/* Test unrecognised encoding handler */
static void dummy_release(void *UNUSED_P(data))
{
}
static int XMLCALL
UnrecognisedEncodingHandler(void *UNUSED_P(data),
const XML_Char *UNUSED_P(encoding),
XML_Encoding *info)
{
info->data = NULL;
info->convert = NULL;
info->release = dummy_release;
return XML_STATUS_ERROR;
}
START_TEST(test_unrecognised_encoding_internal_entity)
{
const char *text =
"<?xml version='1.0' encoding='unsupported-encoding'?>\n"
"<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
"<test a='&foo;'/>";
XML_SetUnknownEncodingHandler(parser,
UnrecognisedEncodingHandler,
NULL);
if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) != XML_STATUS_ERROR)
fail("Unrecognised encoding not rejected");
}
END_TEST
/* Regression test for SF bug #620106. */
static int XMLCALL
external_entity_loader(XML_Parser parser,
const XML_Char *context,
const XML_Char *UNUSED_P(base),
const XML_Char *UNUSED_P(systemId),
const XML_Char *UNUSED_P(publicId))
{
ExtTest *test_data = (ExtTest *)XML_GetUserData(parser);
XML_Parser extparser;
extparser = XML_ExternalEntityParserCreate(parser, context, NULL);
if (extparser == NULL)
fail("Could not create external entity parser.");
if (test_data->encoding != NULL) {
if (!XML_SetEncoding(extparser, test_data->encoding))
fail("XML_SetEncoding() ignored for external entity");
}
if ( _XML_Parse_SINGLE_BYTES(extparser,
test_data->parse_text,
strlen(test_data->parse_text),
XML_TRUE)
== XML_STATUS_ERROR) {
xml_failure(extparser);
return XML_STATUS_ERROR;
}
XML_ParserFree(extparser);
return XML_STATUS_OK;
}
START_TEST(test_ext_entity_set_encoding)
{
const char *text =
"<!DOCTYPE doc [\n"
" <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
"]>\n"
"<doc>&en;</doc>";
ExtTest test_data = {
/* This text says it's an unsupported encoding, but it's really
UTF-8, which we tell Expat using XML_SetEncoding().
*/
"<?xml encoding='iso-8859-3'?>\xC3\xA9",
"utf-8",
NULL
};
XML_SetExternalEntityRefHandler(parser, external_entity_loader);
run_ext_character_check(text, &test_data, "\xC3\xA9");
}
END_TEST
/* Test external entities with no handler */
START_TEST(test_ext_entity_no_handler)
{
const char *text =
"<!DOCTYPE doc [\n"
" <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
"]>\n"
"<doc>&en;</doc>";
XML_SetDefaultHandler(parser, dummy_default_handler);
run_character_check(text, "");
}
END_TEST
/* Test UTF-8 BOM is accepted */
START_TEST(test_ext_entity_set_bom)
{
const char *text =
"<!DOCTYPE doc [\n"
" <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
"]>\n"
"<doc>&en;</doc>";
ExtTest test_data = {
"\xEF\xBB\xBF" /* BOM */
"<?xml encoding='iso-8859-3'?>"
"\xC3\xA9",
"utf-8",
NULL
};
XML_SetExternalEntityRefHandler(parser, external_entity_loader);
run_ext_character_check(text, &test_data, "\xC3\xA9");
}
END_TEST
/* Test that bad encodings are faulted */
typedef struct ext_faults
{
const char *parse_text;
const char *fail_text;
const char *encoding;
enum XML_Error error;
} ExtFaults;
static int XMLCALL
external_entity_faulter(XML_Parser parser,
const XML_Char *context,
const XML_Char *UNUSED_P(base),
const XML_Char *UNUSED_P(systemId),
const XML_Char *UNUSED_P(publicId))
{
XML_Parser ext_parser;
ExtFaults *fault = (ExtFaults *)XML_GetUserData(parser);
ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL);
if (ext_parser == NULL)
fail("Could not create external entity parser");
if (fault->encoding != NULL) {
if (!XML_SetEncoding(ext_parser, fault->encoding))
fail("XML_SetEncoding failed");
}
if (_XML_Parse_SINGLE_BYTES(ext_parser,
fault->parse_text,
strlen(fault->parse_text),
XML_TRUE) != XML_STATUS_ERROR)
fail(fault->fail_text);
if (XML_GetErrorCode(ext_parser) != fault->error)
xml_failure(ext_parser);
XML_ParserFree(ext_parser);
return XML_STATUS_ERROR;
}
START_TEST(test_ext_entity_bad_encoding)
{
const char *text =
"<!DOCTYPE doc [\n"
" <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
"]>\n"
"<doc>&en;</doc>";
ExtFaults fault = {
"<?xml encoding='iso-8859-3'?>u",
"Unsupported encoding not faulted",
"unknown",
XML_ERROR_UNKNOWN_ENCODING
};
XML_SetExternalEntityRefHandler(parser, external_entity_faulter);
XML_SetUserData(parser, &fault);
expect_failure(text,
XML_ERROR_EXTERNAL_ENTITY_HANDLING,
"Bad encoding should not have been accepted");
}
END_TEST
/* Try handing an invalid encoding to an external entity parser */
START_TEST(test_ext_entity_bad_encoding_2)
{
const char *text =
"<?xml version='1.0' encoding='us-ascii'?>\n"
"<!DOCTYPE doc SYSTEM 'foo'>\n"
"<doc>&entity;</doc>";
ExtFaults fault = {
"<!ELEMENT doc (#PCDATA)*>",
"Unknown encoding not faulted",
"unknown-encoding",
XML_ERROR_UNKNOWN_ENCODING
};
XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
XML_SetExternalEntityRefHandler(parser, external_entity_faulter);
XML_SetUserData(parser, &fault);
expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
"Bad encoding not faulted in external entity handler");
}
END_TEST
/* Test that no error is reported for unknown entities if we don't
read an external subset. This was fixed in Expat 1.95.5.
*/
START_TEST(test_wfc_undeclared_entity_unread_external_subset) {
const char *text =
"<!DOCTYPE doc SYSTEM 'foo'>\n"
"<doc>&entity;</doc>";
if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
/* Test that an error is reported for unknown entities if we don't
have an external subset.
*/
START_TEST(test_wfc_undeclared_entity_no_external_subset) {
expect_failure("<doc>&entity;</doc>",
XML_ERROR_UNDEFINED_ENTITY,
"Parser did not report undefined entity w/out a DTD.");
}
END_TEST
/* Test that an error is reported for unknown entities if we don't
read an external subset, but have been declared standalone.
*/
START_TEST(test_wfc_undeclared_entity_standalone) {
const char *text =
"<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
"<!DOCTYPE doc SYSTEM 'foo'>\n"
"<doc>&entity;</doc>";
expect_failure(text,
XML_ERROR_UNDEFINED_ENTITY,
"Parser did not report undefined entity (standalone).");
}
END_TEST
/* Test that an error is reported for unknown entities if we have read
an external subset, and standalone is true.
*/
START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) {
const char *text =
"<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
"<!DOCTYPE doc SYSTEM 'foo'>\n"
"<doc>&entity;</doc>";
ExtTest test_data = {
"<!ELEMENT doc (#PCDATA)*>",
NULL,
NULL
};
XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
XML_SetUserData(parser, &test_data);
XML_SetExternalEntityRefHandler(parser, external_entity_loader);
expect_failure(text,
XML_ERROR_UNDEFINED_ENTITY,
"Parser did not report undefined entity (external DTD).");
}
END_TEST
/* Test that external entity handling is not done if the parsing flag
* is set to UNLESS_STANDALONE
*/
START_TEST(test_entity_with_external_subset_unless_standalone) {
const char *text =
"<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
"<!DOCTYPE doc SYSTEM 'foo'>\n"
"<doc>&entity;</doc>";
ExtTest test_data = { "<!ENTITY entity 'bar'>", NULL, NULL };
XML_SetParamEntityParsing(parser,
XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
XML_SetUserData(parser, &test_data);
XML_SetExternalEntityRefHandler(parser, external_entity_loader);
expect_failure(text,
XML_ERROR_UNDEFINED_ENTITY,
"Parser did not report undefined entity");
}
END_TEST
/* Test that no error is reported for unknown entities if we have read
an external subset, and standalone is false.
*/
START_TEST(test_wfc_undeclared_entity_with_external_subset) {
const char *text =
"<?xml version='1.0' encoding='us-ascii'?>\n"
"<!DOCTYPE doc SYSTEM 'foo'>\n"
"<doc>&entity;</doc>";
ExtTest test_data = {
"<!ELEMENT doc (#PCDATA)*>",
NULL,
NULL
};
XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
XML_SetExternalEntityRefHandler(parser, external_entity_loader);
run_ext_character_check(text, &test_data, "");
}
END_TEST
/* Test that an error is reported if our NotStandalone handler fails */
static int XMLCALL
reject_not_standalone_handler(void *UNUSED_P(userData))
{
return XML_STATUS_ERROR;
}
START_TEST(test_not_standalone_handler_reject)
{
const char *text =
"<?xml version='1.0' encoding='us-ascii'?>\n"
"<!DOCTYPE doc SYSTEM 'foo'>\n"
"<doc>&entity;</doc>";
ExtTest test_data = {
"<!ELEMENT doc (#PCDATA)*>",
NULL,
NULL
};
XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
XML_SetUserData(parser, &test_data);
XML_SetExternalEntityRefHandler(parser, external_entity_loader);
XML_SetNotStandaloneHandler(parser, reject_not_standalone_handler);
expect_failure(text, XML_ERROR_NOT_STANDALONE,
"NotStandalone handler failed to reject");
/* Try again but without external entity handling */
XML_ParserReset(parser, NULL);
XML_SetNotStandaloneHandler(parser, reject_not_standalone_handler);
expect_failure(text, XML_ERROR_NOT_STANDALONE,
"NotStandalone handler failed to reject");
}
END_TEST
/* Test that no error is reported if our NotStandalone handler succeeds */
static int XMLCALL
accept_not_standalone_handler(void *UNUSED_P(userData))
{
return XML_STATUS_OK;
}
START_TEST(test_not_standalone_handler_accept)
{
const char *text =
"<?xml version='1.0' encoding='us-ascii'?>\n"
"<!DOCTYPE doc SYSTEM 'foo'>\n"
"<doc>&entity;</doc>";
ExtTest test_data = {
"<!ELEMENT doc (#PCDATA)*>",
NULL,
NULL
};
XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
XML_SetExternalEntityRefHandler(parser, external_entity_loader);
XML_SetNotStandaloneHandler(parser, accept_not_standalone_handler);
run_ext_character_check(text, &test_data, "");
/* Repeat wtihout the external entity handler */
XML_ParserReset(parser, NULL);
XML_SetNotStandaloneHandler(parser, accept_not_standalone_handler);
run_character_check(text, "");
}
END_TEST
START_TEST(test_wfc_no_recursive_entity_refs)
{
const char *text =
"<!DOCTYPE doc [\n"
" <!ENTITY entity '&#38;entity;'>\n"
"]>\n"
"<doc>&entity;</doc>";
expect_failure(text,
XML_ERROR_RECURSIVE_ENTITY_REF,
"Parser did not report recursive entity reference.");
}
END_TEST
/* Test incomplete external entities are faulted */
START_TEST(test_ext_entity_invalid_parse)
{
const char *text =
"<!DOCTYPE doc [\n"
" <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
"]>\n"
"<doc>&en;</doc>";
const ExtFaults faults[] = {
{
"<",
"Incomplete element declaration not faulted",
NULL,
XML_ERROR_UNCLOSED_TOKEN
},
{
"<\xe2\x82", /* First two bytes of a three-byte char */
"Incomplete character not faulted",
NULL,
XML_ERROR_PARTIAL_CHAR
},
{
"<tag>\xe2\x82",
"Incomplete character in CDATA not faulted",
NULL,
XML_ERROR_PARTIAL_CHAR
},
{ NULL, NULL, NULL, XML_ERROR_NONE }
};
const ExtFaults *fault = faults;
for (; fault->parse_text != NULL; fault++) {
XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
XML_SetExternalEntityRefHandler(parser, external_entity_faulter);
XML_SetUserData(parser, (void *)fault);
expect_failure(text,
XML_ERROR_EXTERNAL_ENTITY_HANDLING,
"Parser did not report external entity error");
XML_ParserReset(parser, NULL);
}
}
END_TEST
/* Regression test for SF bug #483514. */
START_TEST(test_dtd_default_handling)
{
const char *text =
"<!DOCTYPE doc [\n"
"<!ENTITY e SYSTEM 'http://example.org/e'>\n"
"<!NOTATION n SYSTEM 'http://example.org/n'>\n"
"<!ELEMENT doc EMPTY>\n"
"<!ATTLIST doc a CDATA #IMPLIED>\n"
"<?pi in dtd?>\n"
"<!--comment in dtd-->\n"
"]><doc/>";
XML_SetDefaultHandler(parser, accumulate_characters);
XML_SetStartDoctypeDeclHandler(parser, dummy_start_doctype_handler);
XML_SetEndDoctypeDeclHandler(parser, dummy_end_doctype_handler);
XML_SetEntityDeclHandler(parser, dummy_entity_decl_handler);
XML_SetNotationDeclHandler(parser, dummy_notation_decl_handler);
XML_SetElementDeclHandler(parser, dummy_element_decl_handler);
XML_SetAttlistDeclHandler(parser, dummy_attlist_decl_handler);
XML_SetProcessingInstructionHandler(parser, dummy_pi_handler);
XML_SetCommentHandler(parser, dummy_comment_handler);
XML_SetStartCdataSectionHandler(parser, dummy_start_cdata_handler);
XML_SetEndCdataSectionHandler(parser, dummy_end_cdata_handler);
run_character_check(text, "\n\n\n\n\n\n\n<doc/>");
}
END_TEST
/* Test handling of attribute declarations */
typedef struct AttTest {
const XML_Char *definition;
const XML_Char *element_name;
const XML_Char *attr_name;
const XML_Char *attr_type;
const XML_Char *default_value;
int is_required;
} AttTest;
static void XMLCALL
verify_attlist_decl_handler(void *userData,
const XML_Char *element_name,
const XML_Char *attr_name,
const XML_Char *attr_type,
const XML_Char *default_value,
int is_required)
{
AttTest *at = (AttTest *)userData;
if (strcmp(element_name, at->element_name))
fail("Unexpected element name in attribute declaration");
if (strcmp(attr_name, at->attr_name))
fail("Unexpected attribute name in attribute declaration");
if (strcmp(attr_type, at->attr_type))
fail("Unexpected attribute type in attribute declaration");
if ((default_value == NULL && at->default_value != NULL) ||
(default_value != NULL && at->default_value == NULL) ||
(default_value != NULL && strcmp(default_value, at->default_value)))
fail("Unexpected default value in attribute declaration");
if (is_required != at->is_required)
fail("Requirement mismatch in attribute declaration");
}
START_TEST(test_dtd_attr_handling)
{
const char *prolog =
"<!DOCTYPE doc [\n"
"<!ELEMENT doc EMPTY>\n";
AttTest attr_data[] = {
{
"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n"
"]>"
"<doc a='two'/>",
"doc",
"a",
"(one|two|three)", /* Extraneous spaces will be removed */
NULL,
XML_TRUE
},
{
"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n"
"<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n"
"]>"
"<doc/>",
"doc",
"a",
"NOTATION(foo)",
NULL,
XML_FALSE
},
{
"<!ATTLIST doc a NOTATION (foo) 'bar'>\n"
"]>"
"<doc/>",
"doc",
"a",
"NOTATION(foo)",
"bar",
XML_FALSE
},
{
"<!ATTLIST doc a CDATA '\xdb\xb2'>\n"
"]>"
"<doc/>",
"doc",
"a",
"CDATA",
"\xdb\xb2",
XML_FALSE
},
{ NULL, NULL, NULL, NULL, NULL, XML_FALSE }
};
AttTest *test;
for (test = attr_data; test->definition != NULL; test++) {
XML_SetAttlistDeclHandler(parser, verify_attlist_decl_handler);
XML_SetUserData(parser, test);
if (_XML_Parse_SINGLE_BYTES(parser, prolog, strlen(prolog),
XML_FALSE) == XML_STATUS_ERROR)
xml_failure(parser);
if (_XML_Parse_SINGLE_BYTES(parser,
test->definition,
strlen(test->definition),
XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
XML_ParserReset(parser, NULL);
}
}
END_TEST
/* See related SF bug #673791.
When namespace processing is enabled, setting the namespace URI for
a prefix is not allowed; this test ensures that it *is* allowed
when namespace processing is not enabled.
(See Namespaces in XML, section 2.)
*/
START_TEST(test_empty_ns_without_namespaces)
{
const char *text =
"<doc xmlns:prefix='http://example.org/'>\n"
" <e xmlns:prefix=''/>\n"
"</doc>";
if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
/* Regression test for SF bug #824420.
Checks that an xmlns:prefix attribute set in an attribute's default
value isn't misinterpreted.
*/
START_TEST(test_ns_in_attribute_default_without_namespaces)
{
const char *text =
"<!DOCTYPE e:element [\n"
" <!ATTLIST e:element\n"
" xmlns:e CDATA 'http://example.org/'>\n"
" ]>\n"
"<e:element/>";
if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
static const char *long_character_data_text =
"<?xml version='1.0' encoding='iso-8859-1'?><s>"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"</s>";
static XML_Bool resumable = XML_FALSE;
static void
clearing_aborting_character_handler(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(s), int UNUSED_P(len))
{
XML_StopParser(parser, resumable);
XML_SetCharacterDataHandler(parser, NULL);
}
/* Regression test for SF bug #1515266: missing check of stopped
parser in doContext() 'for' loop. */
START_TEST(test_stop_parser_between_char_data_calls)
{
/* The sample data must be big enough that there are two calls to
the character data handler from within the inner "for" loop of
the XML_TOK_DATA_CHARS case in doContent(), and the character
handler must stop the parser and clear the character data
handler.
*/
const char *text = long_character_data_text;
XML_SetCharacterDataHandler(parser, clearing_aborting_character_handler);
resumable = XML_FALSE;
if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) != XML_STATUS_ERROR)
xml_failure(parser);
if (XML_GetErrorCode(parser) != XML_ERROR_ABORTED)
xml_failure(parser);
}
END_TEST
/* Regression test for SF bug #1515266: missing check of stopped
parser in doContext() 'for' loop. */
START_TEST(test_suspend_parser_between_char_data_calls)
{
/* The sample data must be big enough that there are two calls to
the character data handler from within the inner "for" loop of
the XML_TOK_DATA_CHARS case in doContent(), and the character
handler must stop the parser and clear the character data
handler.
*/
const char *text = long_character_data_text;
XML_SetCharacterDataHandler(parser, clearing_aborting_character_handler);
resumable = XML_TRUE;
if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) != XML_STATUS_SUSPENDED)
xml_failure(parser);
if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
xml_failure(parser);
/* Try parsing directly */
if (XML_Parse(parser, text, strlen(text), XML_TRUE) != XML_STATUS_ERROR)
fail("Attempt to continue parse while suspended not faulted");
if (XML_GetErrorCode(parser) != XML_ERROR_SUSPENDED)
fail("Suspended parse not faulted with correct error");
}
END_TEST
static XML_Bool abortable = XML_FALSE;
static void
parser_stop_character_handler(void *UNUSED_P(userData),
const XML_Char *UNUSED_P(s),
int UNUSED_P(len))
{
XML_StopParser(parser, resumable);
XML_SetCharacterDataHandler(parser, NULL);
if (!resumable) {
/* Check that aborting an aborted parser is faulted */
if (XML_StopParser(parser, XML_FALSE) != XML_STATUS_ERROR)
fail("Aborting aborted parser not faulted");
if (XML_GetErrorCode(parser) != XML_ERROR_FINISHED)
xml_failure(parser);
} else if (abortable) {
/* Check that aborting a suspended parser works */
if (XML_StopParser(parser, XML_FALSE) == XML_STATUS_ERROR)
xml_failure(parser);
} else {
/* Check that suspending a suspended parser works */
if (XML_StopParser(parser, XML_TRUE) != XML_STATUS_ERROR)
fail("Suspending suspended parser not faulted");
if (XML_GetErrorCode(parser) != XML_ERROR_SUSPENDED)
xml_failure(parser);
}
}
/* Test repeated calls to XML_StopParser are handled correctly */
START_TEST(test_repeated_stop_parser_between_char_data_calls)
{
const char *text = long_character_data_text;
XML_SetCharacterDataHandler(parser, parser_stop_character_handler);
resumable = XML_FALSE;
abortable = XML_FALSE;
if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text),
XML_TRUE) != XML_STATUS_ERROR)
fail("Failed to double-stop parser");
XML_ParserReset(parser, NULL);
XML_SetCharacterDataHandler(parser, parser_stop_character_handler);
resumable = XML_TRUE;
abortable = XML_FALSE;
if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text),
XML_TRUE) != XML_STATUS_SUSPENDED)
fail("Failed to double-suspend parser");
XML_ParserReset(parser, NULL);
XML_SetCharacterDataHandler(parser, parser_stop_character_handler);
resumable = XML_TRUE;
abortable = XML_TRUE;
if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text),
XML_TRUE) != XML_STATUS_ERROR)
fail("Failed to suspend-abort parser");
}
END_TEST
START_TEST(test_good_cdata_ascii)
{
const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>";
const char *expected = "<greeting>Hello, world!</greeting>";
CharData storage;
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
XML_SetCharacterDataHandler(parser, accumulate_characters);
/* Add start and end handlers for coverage */
XML_SetStartCdataSectionHandler(parser, dummy_start_cdata_handler);
XML_SetEndCdataSectionHandler(parser, dummy_end_cdata_handler);
if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
CharData_CheckXMLChars(&storage, expected);
/* Try again, this time with a default handler */
XML_ParserReset(parser, NULL);
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
XML_SetCharacterDataHandler(parser, accumulate_characters);
XML_SetDefaultHandler(parser, dummy_default_handler);
if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
CharData_CheckXMLChars(&storage, expected);
}
END_TEST
START_TEST(test_good_cdata_utf16)
{
/* Test data is:
* <?xml version='1.0' encoding='utf-16'?>
* <a><![CDATA[hello]]></a>
*/
const char text[] =
"\0<\0?\0x\0m\0l\0"
" \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
" \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0""1\0""6\0'"
"\0?\0>\0\n"
"\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>";
const char *expected = "hello";
CharData storage;
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
XML_SetCharacterDataHandler(parser, accumulate_characters);
if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
CharData_CheckXMLChars(&storage, expected);
}
END_TEST
START_TEST(test_good_cdata_utf16_le)
{
/* Test data is:
* <?xml version='1.0' encoding='utf-16'?>
* <a><![CDATA[hello]]></a>
*/
const char text[] =
"<\0?\0x\0m\0l\0"
" \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
" \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0""1\0""6\0'"
"\0?\0>\0\n"
"\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0";
const char *expected = "hello";
CharData storage;
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
XML_SetCharacterDataHandler(parser, accumulate_characters);
if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
CharData_CheckXMLChars(&storage, expected);
}
END_TEST
/* Test UTF16 conversion of a long cdata string */
/* 16 characters: handy macro to reduce visual clutter */
#define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"
START_TEST(test_long_cdata_utf16)
{
/* Test data is:
* <?xlm version='1.0' encoding='utf-16'?>
* <a><![CDATA[
* ABCDEFGHIJKLMNOP
* ]]></a>
*/
const char text[] =
"\0<\0?\0x\0m\0l\0 "
"\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 "
"\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>"
"\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
/* 64 characters per line */
A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16
A_TO_P_IN_UTF16
"\0]\0]\0>\0<\0/\0a\0>";
const char *expected =
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
"ABCDEFGHIJKLMNOP";
CharData storage;
void *buffer;
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
XML_SetCharacterDataHandler(parser, accumulate_characters);
buffer = XML_GetBuffer(parser, sizeof(text) - 1);
if (buffer == NULL)
fail("Could not allocate parse buffer");
memcpy(buffer, text, sizeof(text) - 1);
if (XML_ParseBuffer(parser,
sizeof(text) - 1,
XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
CharData_CheckXMLChars(&storage, expected);
}
END_TEST
/* Test handling of multiple unit UTF-16 characters */
START_TEST(test_multichar_cdata_utf16)
{
/* Test data is:
* <?xml version='1.0' encoding='utf-16'?>
* <a><![CDATA[{MINIM}{CROTCHET}]]></a>
*
* where {MINIM} is U+1d15e (a minim or half-note)
* UTF-16: 0xd834 0xdd5e
* UTF-8: 0xf0 0x9d 0x85 0x9e
* and {CROTCHET} is U+1d15f (a crotchet or quarter-note)
* UTF-16: 0xd834 0xdd5e
* UTF-8: 0xf0 0x9d 0x85 0x9e
*/
const char text[] =
"\0<\0?\0x\0m\0l\0"
" \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
" \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0""1\0""6\0'"
"\0?\0>\0\n"
"\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
"\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f"
"\0]\0]\0>\0<\0/\0a\0>";
const char *expected = "\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f";
CharData storage;
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
XML_SetCharacterDataHandler(parser, accumulate_characters);
if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
CharData_CheckXMLChars(&storage, expected);
}
END_TEST
/* Test that an element name with a UTF-16 surrogate pair is rejected */
START_TEST(test_utf16_bad_surrogate_pair)
{
/* Test data is:
* <?xml version='1.0' encoding='utf-16'?>
* <a><![CDATA[{BADLINB}]]></a>
*
* where {BADLINB} is U+10000 (the first Linear B character)
* with the UTF-16 surrogate pair in the wrong order, i.e.
* 0xdc00 0xd800
*/
const char text[] =
"\0<\0?\0x\0m\0l\0"
" \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
" \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0""1\0""6\0'"
"\0?\0>\0\n"
"\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
"\xdc\x00\xd8\x00"
"\0]\0]\0>\0<\0/\0a\0>";
if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text) - 1,
XML_TRUE) != XML_STATUS_ERROR)
fail("Reversed UTF-16 surrogate pair not faulted");
if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)
xml_failure(parser);
}
END_TEST
START_TEST(test_bad_cdata)
{
struct CaseData {
const char *text;
enum XML_Error expectedError;
};
struct CaseData cases[] = {
{"<a><", XML_ERROR_UNCLOSED_TOKEN},
{"<a><!", XML_ERROR_UNCLOSED_TOKEN},
{"<a><![", XML_ERROR_UNCLOSED_TOKEN},
{"<a><![C", XML_ERROR_UNCLOSED_TOKEN},
{"<a><![CD", XML_ERROR_UNCLOSED_TOKEN},
{"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN},
{"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN},
{"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN},
{"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION},
{"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION},
{"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION},
{"<a><!<a/>", XML_ERROR_INVALID_TOKEN},
{"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
{"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
{"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN},
{"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN},
{"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN},
{"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN},
{"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
{"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
{"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}
};
size_t i = 0;
for (; i < sizeof(cases) / sizeof(struct CaseData); i++) {
const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES(
parser, cases[i].text, strlen(cases[i].text), XML_TRUE);
const enum XML_Error actualError = XML_GetErrorCode(parser);
assert(actualStatus == XML_STATUS_ERROR);
if (actualError != cases[i].expectedError) {
char message[100];
sprintf(message, "Expected error %d but got error %d for case %u: \"%s\"\n",
cases[i].expectedError, actualError, (unsigned int)i + 1, cases[i].text);
fail(message);
}
XML_ParserReset(parser, NULL);
}
}
END_TEST
/* Test failures in UTF-16 CDATA */
START_TEST(test_bad_cdata_utf16)
{
struct CaseData {
size_t text_bytes;
const char *text;
enum XML_Error expected_error;
};
const char prolog[] =
"\0<\0?\0x\0m\0l\0"
" \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
" \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0""1\0""6\0'"
"\0?\0>\0\n"
"\0<\0a\0>";
struct CaseData cases[] = {
{1, "\0", XML_ERROR_UNCLOSED_TOKEN},
{2, "\0<", XML_ERROR_UNCLOSED_TOKEN},
{3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN},
{4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN},
{5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN},
{6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN},
{7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN},
{8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN},
{9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN},
{10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN},
{11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN},
{12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN},
{13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN},
{14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN},
{15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN},
{16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN},
{17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN},
{18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[",
XML_ERROR_UNCLOSED_CDATA_SECTION},
{19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0",
XML_ERROR_UNCLOSED_CDATA_SECTION},
{20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z",
XML_ERROR_UNCLOSED_CDATA_SECTION},
/* Now add a four-byte UTF-16 character */
{21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8",
XML_ERROR_UNCLOSED_CDATA_SECTION},
{22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34",
XML_ERROR_PARTIAL_CHAR},
{23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd",
XML_ERROR_PARTIAL_CHAR},
{24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e",
XML_ERROR_UNCLOSED_CDATA_SECTION}
};
size_t i;
for (i = 0; i < sizeof(cases)/sizeof(struct CaseData); i++) {
enum XML_Status actual_status;
enum XML_Error actual_error;
if (_XML_Parse_SINGLE_BYTES(parser, prolog, sizeof(prolog)-1,
XML_FALSE) == XML_STATUS_ERROR)
xml_failure(parser);
actual_status = _XML_Parse_SINGLE_BYTES(parser,
cases[i].text,
cases[i].text_bytes,
XML_TRUE);
assert(actual_status == XML_STATUS_ERROR);
actual_error = XML_GetErrorCode(parser);
if (actual_error != cases[i].expected_error) {
char message[1024];
sprintf(message,
"Expected error %d (%s), got %d (%s) for case %lu\n",
cases[i].expected_error,
XML_ErrorString(cases[i].expected_error),
actual_error,
XML_ErrorString(actual_error),
(long unsigned)(i+1));
fail(message);
}
XML_ParserReset(parser, NULL);
}
}
END_TEST
static const char *long_cdata_text =
"<s><![CDATA["
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"012345678901234567890123456789012345678901234567890123456789"
"]]></s>";
/* Test stopping the parser in cdata handler */
START_TEST(test_stop_parser_between_cdata_calls)
{
const char *text = long_cdata_text;
XML_SetCharacterDataHandler(parser,
clearing_aborting_character_handler);
resumable = XML_FALSE;
expect_failure(text, XML_ERROR_ABORTED,
"Parse not aborted in CDATA handler");
}
END_TEST
/* Test suspending the parser in cdata handler */
START_TEST(test_suspend_parser_between_cdata_calls)
{
const char *text = long_cdata_text;
enum XML_Status result;
XML_SetCharacterDataHandler(parser,
clearing_aborting_character_handler);
resumable = XML_TRUE;
result = _XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE);
if (result != XML_STATUS_SUSPENDED) {
if (result == XML_STATUS_ERROR)
xml_failure(parser);
fail("Parse not suspended in CDATA handler");
}
if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
xml_failure(parser);
}
END_TEST
/* Test memory allocation functions */
START_TEST(test_memory_allocation)
{
char *buffer = (char *)XML_MemMalloc(parser, 256);
char *p;
if (buffer == NULL) {
fail("Allocation failed");
} else {
/* Try writing to memory; some OSes try to cheat! */
buffer[0] = 'T';
buffer[1] = 'E';
buffer[2] = 'S';
buffer[3] = 'T';
buffer[4] = '\0';
if (strcmp(buffer, "TEST") != 0) {
fail("Memory not writable");
} else {
p = (char *)XML_MemRealloc(parser, buffer, 512);
if (p == NULL) {
fail("Reallocation failed");
} else {
/* Write again, just to be sure */
buffer = p;
buffer[0] = 'V';
if (strcmp(buffer, "VEST") != 0) {
fail("Reallocated memory not writable");
}
}
}
XML_MemFree(parser, buffer);
}
}
END_TEST
static void XMLCALL
record_default_handler(void *userData,
const XML_Char *UNUSED_P(s),
int UNUSED_P(len))
{
CharData_AppendString((CharData *)userData, "D");
}
static void XMLCALL
record_cdata_handler(void *userData,
const XML_Char *UNUSED_P(s),
int UNUSED_P(len))
{
CharData_AppendString((CharData *)userData, "C");
XML_DefaultCurrent(parser);
}
static void XMLCALL
record_cdata_nodefault_handler(void *userData,
const XML_Char *UNUSED_P(s),
int UNUSED_P(len))
{
CharData_AppendString((CharData *)userData, "c");
}
static void XMLCALL
record_skip_handler(void *userData,
const XML_Char *UNUSED_P(entityName),
int is_parameter_entity)
{
CharData_AppendString((CharData *)userData,
is_parameter_entity ? "E" : "e");
}
/* Test XML_DefaultCurrent() passes handling on correctly */
START_TEST(test_default_current)
{
const char *text = "<doc>hell]</doc>";
const char *entity_text =
"<!DOCTYPE doc [\n"
"<!ENTITY entity '&#37;'>\n"
"]>\n"
"<doc>&entity;</doc>";
CharData storage;
XML_SetDefaultHandler(parser, record_default_handler);
XML_SetCharacterDataHandler(parser, record_cdata_handler);
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text),
XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
CharData_CheckString(&storage, "DCDCDCDCDCDD");
/* Again, without the defaulting */
XML_ParserReset(parser, NULL);
XML_SetDefaultHandler(parser, record_default_handler);
XML_SetCharacterDataHandler(parser, record_cdata_nodefault_handler);
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text),
XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
CharData_CheckString(&storage, "DcccccD");
/* Now with an internal entity to complicate matters */
XML_ParserReset(parser, NULL);
XML_SetDefaultHandler(parser, record_default_handler);
XML_SetCharacterDataHandler(parser, record_cdata_handler);
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
if (_XML_Parse_SINGLE_BYTES(parser, entity_text, strlen(entity_text),
XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
/* The default handler suppresses the entity */
CharData_CheckString(&storage, "DDDDDDDDDDDDDDDDDDD");
/* Again, with a skip handler */
XML_ParserReset(parser, NULL);
XML_SetDefaultHandler(parser, record_default_handler);
XML_SetCharacterDataHandler(parser, record_cdata_handler);
XML_SetSkippedEntityHandler(parser, record_skip_handler);
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
if (_XML_Parse_SINGLE_BYTES(parser, entity_text, strlen(entity_text),
XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
/* The default handler suppresses the entity */
CharData_CheckString(&storage, "DDDDDDDDDDDDDDDDDeD");
/* This time, allow the entity through */
XML_ParserReset(parser, NULL);
XML_SetDefaultHandlerExpand(parser, record_default_handler);
XML_SetCharacterDataHandler(parser, record_cdata_handler);
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
if (_XML_Parse_SINGLE_BYTES(parser, entity_text, strlen(entity_text),
XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
CharData_CheckString(&storage, "DDDDDDDDDDDDDDDDDCDD");
/* Finally, without passing the cdata to the default handler */
XML_ParserReset(parser, NULL);
XML_SetDefaultHandlerExpand(parser, record_default_handler);
XML_SetCharacterDataHandler(parser, record_cdata_nodefault_handler);
CharData_Init(&storage);
XML_SetUserData(parser, &storage);
if (_XML_Parse_SINGLE_BYTES(parser, entity_text, strlen(entity_text),
XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
CharData_CheckString(&storage, "DDDDDDDDDDDDDDDDDcD");
}
END_TEST
/* Test DTD element parsing code paths */
START_TEST(test_dtd_elements)
{
const char *text =
"<!DOCTYPE doc [\n"
"<!ELEMENT doc (chapter)>\n"
"<!ELEMENT chapter (#PCDATA)>\n"
"]>\n"
"<doc><chapter>Wombats are go</chapter></doc>";
XML_SetElementDeclHandler(parser, dummy_element_decl_handler);
if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text),
XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
/* Test foreign DTD handling */
START_TEST(test_set_foreign_dtd)
{
const char *text1 =
"<?xml version='1.0' encoding='us-ascii'?>\n";
const char *text2 =
"<doc>&entity;</doc>";
ExtTest test_data = {
"<!ELEMENT doc (#PCDATA)*>",
NULL,
NULL
};
/* Check hash salt is passed through too */
XML_SetHashSalt(parser, 0x12345678);
XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
XML_SetUserData(parser, &test_data);
XML_SetExternalEntityRefHandler(parser, external_entity_loader);
/* Add a default handler to exercise more code paths */
XML_SetDefaultHandler(parser, dummy_default_handler);
if (XML_UseForeignDTD(parser, XML_TRUE) != XML_ERROR_NONE)
fail("Could not set foreign DTD");
if (_XML_Parse_SINGLE_BYTES(parser, text1, strlen(text1),
XML_FALSE) == XML_STATUS_ERROR)
xml_failure(parser);
/* Ensure that trying to set the DTD after parsing has started
* is faulted, even if it's the same setting.
*/
if (XML_UseForeignDTD(parser, XML_TRUE) !=
XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
fail("Failed to reject late foreign DTD setting");
/* Ditto for the hash salt */
if (XML_SetHashSalt(parser, 0x23456789))
fail("Failed to reject late hash salt change");
/* Now finish the parse */
if (_XML_Parse_SINGLE_BYTES(parser, text2, strlen(text2),
XML_TRUE) == XML_STATUS_ERROR)
xml_failure(parser);
}
END_TEST
/* Test foreign DTD handling with a failing NotStandalone handler */
START_TEST(test_foreign_dtd_not_standalone)
{
const char *text =
"<?xml version='1.0' encoding='us-ascii'?>\n"
"<doc>&entity;</doc>";
ExtTest test_data = {
"<!ELEMENT doc (#PCDATA)*>",
NULL,
NULL
};
XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
XML_SetUserData(parser, &test_data);
XML_SetExternalEntityRefHandler(parser, external_entity_loader);
XML_SetNotStandaloneHandler(parser, reject_not_standalone_handler);
if (XML_UseForeignDTD(parser, XML_TRUE) != XML_ERROR_NONE)
fail("Could not set foreign DTD");
expect_failure(text, XML_ERROR_NOT_STANDALONE,
"NotStandalonehandler failed to reject");
}
END_TEST
/* Test invalid character in a foreign DTD is faulted */
START_TEST(test_invalid_foreign_dtd)
{
const char *text =
"<?xml version='1.0' encoding='us-ascii'?>\n"
"<doc>&entity;</doc>";
ExtFaults test_data = {
"$",
"Dollar not faulted",
NULL,
XML_ERROR_INVALID_TOKEN
};
XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
XML_SetUserData(parser, &test_data);
XML_SetExternalEntityRefHandler(parser, external_entity_faulter);
XML_UseForeignDTD(parser, XML_TRUE);
expect_failure(text,
XML_ERROR_EXTERNAL_ENTITY_HANDLING,
"Bad DTD should not have been accepted");
}
END_TEST
/* Test foreign DTD use with a doctype */
START_TEST(test_foreign_dtd_with_doctype)
{
const char *text1 =
"<?xml version='1.0' encoding='us-ascii'?>\n"
"<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n";
const char *text2 =
"<doc>&entity;</doc>";
ExtTest test_data = {
"<!ELEMENT doc (#PCDATA)*>",
NULL,
NULL
};
/* Check hash salt is passed through too */
XML_SetHashSalt(parser, 0x12345678);
XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
XML_SetUserData(parser, &test_data);
XML_SetExternalEntityRefHandler(parser, external_entity_loader);
/* Add a default handler to exercise more code paths */
XML_SetDefaultHandler(parser, dummy_default_handler);
if (XML_UseForeignDTD(parser, XML_TRUE) != XML_ERROR_NONE)
fail("Could not set foreign DTD");
if (_XML_Parse_SINGLE_BYTES(parser, text1, strlen(text1),
XML_FALSE) == XML_STATUS_ERROR)
xml_failure(parser);
/* Ensure that trying to set the DTD after parsing has started
* is faulted, even if it's the same setting.
*/
if (XML_UseForeignDTD(parser, XML_TRUE) !=
XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
fail("Failed to reject late foreign DTD setting");
/* Ditto for the hash salt */
if (XML_SetHashSalt(parser, 0x23456789))
fail("Failed to reject late hash salt change");
/* Now finish the parse */
if (_X