blob: 6b0eb421381e327eaf756ce2b9ffae85ae5fd77e [file] [log] [blame]
/*
* parser.c : an XML 1.0 parser, namespaces and validity support are mostly
* implemented on top of the SAX interfaces
*
* References:
* The XML specification:
* http://www.w3.org/TR/REC-xml
* Original 1.0 version:
* http://www.w3.org/TR/1998/REC-xml-19980210
* XML second edition working draft
* http://www.w3.org/TR/2000/WD-xml-2e-20000814
*
* Okay this is a big file, the parser core is around 7000 lines, then it
* is followed by the progressive parser top routines, then the various
* high level APIs to call the parser and a few miscellaneous functions.
* A number of helper functions and deprecated ones have been moved to
* parserInternals.c to reduce this file size.
* As much as possible the functions are associated with their relative
* production in the XML specification. A few productions defining the
* different ranges of character are actually implanted either in
* parserInternals.h or parserInternals.c
* The DOM tree build is realized from the default SAX callbacks in
* the module SAX.c.
* The routines doing the validation checks are in valid.c and called either
* from the SAX callbacks or as standalone functions using a preparsed
* document.
*
* See Copyright for the status of this software.
*
* daniel@veillard.com
*/
/* To avoid EBCDIC trouble when parsing on zOS */
#if defined(__MVS__)
#pragma convert("ISO8859-1")
#endif
#define IN_LIBXML
#include "libxml.h"
#if defined(_WIN32)
#define XML_DIR_SEP '\\'
#else
#define XML_DIR_SEP '/'
#endif
#include <stdlib.h>
#include <limits.h>
#include <string.h>
#include <stdarg.h>
#include <stddef.h>
#include <ctype.h>
#include <stdlib.h>
#include <libxml/parser.h>
#include <libxml/xmlmemory.h>
#include <libxml/tree.h>
#include <libxml/parserInternals.h>
#include <libxml/valid.h>
#include <libxml/entities.h>
#include <libxml/xmlerror.h>
#include <libxml/encoding.h>
#include <libxml/xmlIO.h>
#include <libxml/uri.h>
#include <libxml/SAX2.h>
#ifdef LIBXML_CATALOG_ENABLED
#include <libxml/catalog.h>
#endif
#include "private/buf.h"
#include "private/dict.h"
#include "private/entities.h"
#include "private/error.h"
#include "private/html.h"
#include "private/io.h"
#include "private/parser.h"
#define NS_INDEX_EMPTY INT_MAX
#define NS_INDEX_XML (INT_MAX - 1)
#define URI_HASH_EMPTY 0xD943A04E
#define URI_HASH_XML 0xF0451F02
struct _xmlStartTag {
const xmlChar *prefix;
const xmlChar *URI;
int line;
int nsNr;
};
typedef struct {
void *saxData;
unsigned prefixHashValue;
unsigned uriHashValue;
unsigned elementId;
int oldIndex;
} xmlParserNsExtra;
typedef struct {
unsigned hashValue;
int index;
} xmlParserNsBucket;
struct _xmlParserNsData {
xmlParserNsExtra *extra;
unsigned hashSize;
unsigned hashElems;
xmlParserNsBucket *hash;
unsigned elementId;
int defaultNsIndex;
};
struct _xmlAttrHashBucket {
int index;
};
static xmlParserCtxtPtr
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
xmlParserCtxtPtr pctx);
static int
xmlParseElementStart(xmlParserCtxtPtr ctxt);
static void
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
/************************************************************************
* *
* Arbitrary limits set in the parser. See XML_PARSE_HUGE *
* *
************************************************************************/
#define XML_PARSER_BIG_ENTITY 1000
#define XML_PARSER_LOT_ENTITY 5000
/*
* Constants for protection against abusive entity expansion
* ("billion laughs").
*/
/*
* A certain amount of entity expansion which is always allowed.
*/
#define XML_PARSER_ALLOWED_EXPANSION 1000000
/*
* Fixed cost for each entity reference. This crudely models processing time
* as well to protect, for example, against exponential expansion of empty
* or very short entities.
*/
#define XML_ENT_FIXED_COST 20
/**
* xmlParserMaxDepth:
*
* arbitrary depth limit for the XML documents that we allow to
* process. This is not a limitation of the parser but a safety
* boundary feature. It can be disabled with the XML_PARSE_HUGE
* parser option.
*/
unsigned int xmlParserMaxDepth = 256;
#define XML_PARSER_BIG_BUFFER_SIZE 300
#define XML_PARSER_BUFFER_SIZE 100
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
/**
* XML_PARSER_CHUNK_SIZE
*
* When calling GROW that's the minimal amount of data
* the parser expected to have received. It is not a hard
* limit but an optimization when reading strings like Names
* It is not strictly needed as long as inputs available characters
* are followed by 0, which should be provided by the I/O level
*/
#define XML_PARSER_CHUNK_SIZE 100
/**
* xmlParserVersion:
*
* Constant string describing the internal version of the library
*/
const char *const
xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
/*
* List of XML prefixed PI allowed by W3C specs
*/
static const char* const xmlW3CPIs[] = {
"xml-stylesheet",
"xml-model",
NULL
};
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
const xmlChar **str);
static xmlParserErrors
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
xmlSAXHandlerPtr sax,
void *user_data, int depth, const xmlChar *URL,
const xmlChar *ID, xmlNodePtr *list);
static int
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
const char *encoding);
#ifdef LIBXML_LEGACY_ENABLED
static void
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
xmlNodePtr lastNode);
#endif /* LIBXML_LEGACY_ENABLED */
static xmlParserErrors
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
const xmlChar *string, void *user_data, xmlNodePtr *lst);
static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
/************************************************************************
* *
* Some factorized error routines *
* *
************************************************************************/
/**
* xmlErrAttributeDup:
* @ctxt: an XML parser context
* @prefix: the attribute prefix
* @localname: the attribute localname
*
* Handle a redefinition of attribute error
*/
static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
const xmlChar * localname)
{
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
(ctxt->instate == XML_PARSER_EOF))
return;
if (ctxt != NULL)
ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
if (prefix == NULL)
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
(const char *) localname, NULL, NULL, 0, 0,
"Attribute %s redefined\n", localname);
else
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
(const char *) prefix, (const char *) localname,
NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
localname);
if (ctxt != NULL) {
ctxt->wellFormed = 0;
if (ctxt->recovery == 0)
ctxt->disableSAX = 1;
}
}
/**
* xmlFatalErrMsg:
* @ctxt: an XML parser context
* @error: the error number
* @msg: the error message
*
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
*/
static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
const char *msg)
{
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
(ctxt->instate == XML_PARSER_EOF))
return;
if (ctxt != NULL)
ctxt->errNo = error;
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
if (ctxt != NULL) {
ctxt->wellFormed = 0;
if (ctxt->recovery == 0)
ctxt->disableSAX = 1;
}
}
/**
* xmlWarningMsg:
* @ctxt: an XML parser context
* @error: the error number
* @msg: the error message
* @str1: extra data
* @str2: extra data
*
* Handle a warning.
*/
void LIBXML_ATTR_FORMAT(3,0)
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
const char *msg, const xmlChar *str1, const xmlChar *str2)
{
xmlStructuredErrorFunc schannel = NULL;
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
(ctxt->instate == XML_PARSER_EOF))
return;
if ((ctxt != NULL) && (ctxt->sax != NULL) &&
(ctxt->sax->initialized == XML_SAX2_MAGIC))
schannel = ctxt->sax->serror;
if (ctxt != NULL) {
__xmlRaiseError(schannel,
(ctxt->sax) ? ctxt->sax->warning : NULL,
ctxt->userData,
ctxt, NULL, XML_FROM_PARSER, error,
XML_ERR_WARNING, NULL, 0,
(const char *) str1, (const char *) str2, NULL, 0, 0,
msg, (const char *) str1, (const char *) str2);
} else {
__xmlRaiseError(schannel, NULL, NULL,
ctxt, NULL, XML_FROM_PARSER, error,
XML_ERR_WARNING, NULL, 0,
(const char *) str1, (const char *) str2, NULL, 0, 0,
msg, (const char *) str1, (const char *) str2);
}
}
/**
* xmlValidityError:
* @ctxt: an XML parser context
* @error: the error number
* @msg: the error message
* @str1: extra data
*
* Handle a validity error.
*/
static void LIBXML_ATTR_FORMAT(3,0)
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
const char *msg, const xmlChar *str1, const xmlChar *str2)
{
xmlStructuredErrorFunc schannel = NULL;
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
(ctxt->instate == XML_PARSER_EOF))
return;
if (ctxt != NULL) {
ctxt->errNo = error;
if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
schannel = ctxt->sax->serror;
}
if (ctxt != NULL) {
__xmlRaiseError(schannel,
ctxt->vctxt.error, ctxt->vctxt.userData,
ctxt, NULL, XML_FROM_DTD, error,
XML_ERR_ERROR, NULL, 0, (const char *) str1,
(const char *) str2, NULL, 0, 0,
msg, (const char *) str1, (const char *) str2);
ctxt->valid = 0;
} else {
__xmlRaiseError(schannel, NULL, NULL,
ctxt, NULL, XML_FROM_DTD, error,
XML_ERR_ERROR, NULL, 0, (const char *) str1,
(const char *) str2, NULL, 0, 0,
msg, (const char *) str1, (const char *) str2);
}
}
/**
* xmlFatalErrMsgInt:
* @ctxt: an XML parser context
* @error: the error number
* @msg: the error message
* @val: an integer value
*
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
*/
static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
const char *msg, int val)
{
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
(ctxt->instate == XML_PARSER_EOF))
return;
if (ctxt != NULL)
ctxt->errNo = error;
__xmlRaiseError(NULL, NULL, NULL,
ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
if (ctxt != NULL) {
ctxt->wellFormed = 0;
if (ctxt->recovery == 0)
ctxt->disableSAX = 1;
}
}
/**
* xmlFatalErrMsgStrIntStr:
* @ctxt: an XML parser context
* @error: the error number
* @msg: the error message
* @str1: an string info
* @val: an integer value
* @str2: an string info
*
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
*/
static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
const char *msg, const xmlChar *str1, int val,
const xmlChar *str2)
{
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
(ctxt->instate == XML_PARSER_EOF))
return;
if (ctxt != NULL)
ctxt->errNo = error;
__xmlRaiseError(NULL, NULL, NULL,
ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
NULL, 0, (const char *) str1, (const char *) str2,
NULL, val, 0, msg, str1, val, str2);
if (ctxt != NULL) {
ctxt->wellFormed = 0;
if (ctxt->recovery == 0)
ctxt->disableSAX = 1;
}
}
/**
* xmlFatalErrMsgStr:
* @ctxt: an XML parser context
* @error: the error number
* @msg: the error message
* @val: a string value
*
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
*/
static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
const char *msg, const xmlChar * val)
{
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
(ctxt->instate == XML_PARSER_EOF))
return;
if (ctxt != NULL)
ctxt->errNo = error;
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
XML_FROM_PARSER, error, XML_ERR_FATAL,
NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
val);
if (ctxt != NULL) {
ctxt->wellFormed = 0;
if (ctxt->recovery == 0)
ctxt->disableSAX = 1;
}
}
/**
* xmlErrMsgStr:
* @ctxt: an XML parser context
* @error: the error number
* @msg: the error message
* @val: a string value
*
* Handle a non fatal parser error
*/
static void LIBXML_ATTR_FORMAT(3,0)
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
const char *msg, const xmlChar * val)
{
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
(ctxt->instate == XML_PARSER_EOF))
return;
if (ctxt != NULL)
ctxt->errNo = error;
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
XML_FROM_PARSER, error, XML_ERR_ERROR,
NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
val);
}
/**
* xmlNsErr:
* @ctxt: an XML parser context
* @error: the error number
* @msg: the message
* @info1: extra information string
* @info2: extra information string
*
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
*/
static void LIBXML_ATTR_FORMAT(3,0)
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
const char *msg,
const xmlChar * info1, const xmlChar * info2,
const xmlChar * info3)
{
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
(ctxt->instate == XML_PARSER_EOF))
return;
if (ctxt != NULL)
ctxt->errNo = error;
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
XML_ERR_ERROR, NULL, 0, (const char *) info1,
(const char *) info2, (const char *) info3, 0, 0, msg,
info1, info2, info3);
if (ctxt != NULL)
ctxt->nsWellFormed = 0;
}
/**
* xmlNsWarn
* @ctxt: an XML parser context
* @error: the error number
* @msg: the message
* @info1: extra information string
* @info2: extra information string
*
* Handle a namespace warning error
*/
static void LIBXML_ATTR_FORMAT(3,0)
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
const char *msg,
const xmlChar * info1, const xmlChar * info2,
const xmlChar * info3)
{
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
(ctxt->instate == XML_PARSER_EOF))
return;
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
XML_ERR_WARNING, NULL, 0, (const char *) info1,
(const char *) info2, (const char *) info3, 0, 0, msg,
info1, info2, info3);
}
static void
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
if (val > ULONG_MAX - *dst)
*dst = ULONG_MAX;
else
*dst += val;
}
static void
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
if (val > ULONG_MAX - *dst)
*dst = ULONG_MAX;
else
*dst += val;
}
/**
* xmlParserEntityCheck:
* @ctxt: parser context
* @extra: sum of unexpanded entity sizes
*
* Check for non-linear entity expansion behaviour.
*
* In some cases like xmlStringDecodeEntities, this function is called
* for each, possibly nested entity and its unexpanded content length.
*
* In other cases like xmlParseReference, it's only called for each
* top-level entity with its unexpanded content length plus the sum of
* the unexpanded content lengths (plus fixed cost) of all nested
* entities.
*
* Summing the unexpanded lengths also adds the length of the reference.
* This is by design. Taking the length of the entity name into account
* discourages attacks that try to waste CPU time with abusively long
* entity names. See test/recurse/lol6.xml for example. Each call also
* adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
* short entities.
*
* Returns 1 on error, 0 on success.
*/
static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
{
unsigned long consumed;
xmlParserInputPtr input = ctxt->input;
xmlEntityPtr entity = input->entity;
/*
* Compute total consumed bytes so far, including input streams of
* external entities.
*/
consumed = input->parentConsumed;
if ((entity == NULL) ||
((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
((entity->flags & XML_ENT_PARSED) == 0))) {
xmlSaturatedAdd(&consumed, input->consumed);
xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
}
xmlSaturatedAdd(&consumed, ctxt->sizeentities);
/*
* Add extra cost and some fixed cost.
*/
xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
/*
* It's important to always use saturation arithmetic when tracking
* entity sizes to make the size checks reliable. If "sizeentcopy"
* overflows, we have to abort.
*/
if ((ctxt->sizeentcopy > XML_PARSER_ALLOWED_EXPANSION) &&
((ctxt->sizeentcopy >= ULONG_MAX) ||
(ctxt->sizeentcopy / ctxt->maxAmpl > consumed))) {
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
"Maximum entity amplification factor exceeded, see "
"xmlCtxtSetMaxAmplification.\n");
xmlHaltParser(ctxt);
return(1);
}
return(0);
}
/************************************************************************
* *
* Library wide options *
* *
************************************************************************/
/**
* xmlHasFeature:
* @feature: the feature to be examined
*
* Examines if the library has been compiled with a given feature.
*
* Returns a non-zero value if the feature exist, otherwise zero.
* Returns zero (0) if the feature does not exist or an unknown
* unknown feature is requested, non-zero otherwise.
*/
int
xmlHasFeature(xmlFeature feature)
{
switch (feature) {
case XML_WITH_THREAD:
#ifdef LIBXML_THREAD_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_TREE:
#ifdef LIBXML_TREE_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_OUTPUT:
#ifdef LIBXML_OUTPUT_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_PUSH:
#ifdef LIBXML_PUSH_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_READER:
#ifdef LIBXML_READER_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_PATTERN:
#ifdef LIBXML_PATTERN_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_WRITER:
#ifdef LIBXML_WRITER_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_SAX1:
#ifdef LIBXML_SAX1_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_FTP:
#ifdef LIBXML_FTP_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_HTTP:
#ifdef LIBXML_HTTP_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_VALID:
#ifdef LIBXML_VALID_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_HTML:
#ifdef LIBXML_HTML_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_LEGACY:
#ifdef LIBXML_LEGACY_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_C14N:
#ifdef LIBXML_C14N_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_CATALOG:
#ifdef LIBXML_CATALOG_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_XPATH:
#ifdef LIBXML_XPATH_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_XPTR:
#ifdef LIBXML_XPTR_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_XINCLUDE:
#ifdef LIBXML_XINCLUDE_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_ICONV:
#ifdef LIBXML_ICONV_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_ISO8859X:
#ifdef LIBXML_ISO8859X_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_UNICODE:
#ifdef LIBXML_UNICODE_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_REGEXP:
#ifdef LIBXML_REGEXP_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_AUTOMATA:
#ifdef LIBXML_AUTOMATA_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_EXPR:
#ifdef LIBXML_EXPR_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_SCHEMAS:
#ifdef LIBXML_SCHEMAS_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_SCHEMATRON:
#ifdef LIBXML_SCHEMATRON_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_MODULES:
#ifdef LIBXML_MODULES_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_DEBUG:
#ifdef LIBXML_DEBUG_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_DEBUG_MEM:
#ifdef DEBUG_MEMORY_LOCATION
return(1);
#else
return(0);
#endif
case XML_WITH_DEBUG_RUN:
return(0);
case XML_WITH_ZLIB:
#ifdef LIBXML_ZLIB_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_LZMA:
#ifdef LIBXML_LZMA_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_ICU:
#ifdef LIBXML_ICU_ENABLED
return(1);
#else
return(0);
#endif
default:
break;
}
return(0);
}
/************************************************************************
* *
* SAX2 defaulted attributes handling *
* *
************************************************************************/
/**
* xmlDetectSAX2:
* @ctxt: an XML parser context
*
* Do the SAX2 detection and specific initialization
*/
static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
xmlSAXHandlerPtr sax;
/* Avoid unused variable warning if features are disabled. */
(void) sax;
if (ctxt == NULL) return;
sax = ctxt->sax;
#ifdef LIBXML_SAX1_ENABLED
/*
* Only enable SAX2 if there SAX2 element handlers, except when there
* are no element handlers at all.
*/
if ((sax) && (sax->initialized == XML_SAX2_MAGIC) &&
((sax->startElementNs != NULL) ||
(sax->endElementNs != NULL) ||
((sax->startElement == NULL) && (sax->endElement == NULL))))
ctxt->sax2 = 1;
#else
ctxt->sax2 = 1;
#endif /* LIBXML_SAX1_ENABLED */
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
(ctxt->str_xml_ns == NULL)) {
xmlErrMemory(ctxt, NULL);
}
}
typedef struct {
xmlHashedString prefix;
xmlHashedString name;
xmlHashedString value;
const xmlChar *valueEnd;
int external;
int expandedSize;
} xmlDefAttr;
typedef struct _xmlDefAttrs xmlDefAttrs;
typedef xmlDefAttrs *xmlDefAttrsPtr;
struct _xmlDefAttrs {
int nbAttrs; /* number of defaulted attributes on that element */
int maxAttrs; /* the size of the array */
#if __STDC_VERSION__ >= 199901L
/* Using a C99 flexible array member avoids UBSan errors. */
xmlDefAttr attrs[]; /* array of localname/prefix/values/external */
#else
xmlDefAttr attrs[1];
#endif
};
/**
* xmlAttrNormalizeSpace:
* @src: the source string
* @dst: the target string
*
* Normalize the space in non CDATA attribute values:
* If the attribute type is not CDATA, then the XML processor MUST further
* process the normalized attribute value by discarding any leading and
* trailing space (#x20) characters, and by replacing sequences of space
* (#x20) characters by a single space (#x20) character.
* Note that the size of dst need to be at least src, and if one doesn't need
* to preserve dst (and it doesn't come from a dictionary or read-only) then
* passing src as dst is just fine.
*
* Returns a pointer to the normalized value (dst) or NULL if no conversion
* is needed.
*/
static xmlChar *
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
{
if ((src == NULL) || (dst == NULL))
return(NULL);
while (*src == 0x20) src++;
while (*src != 0) {
if (*src == 0x20) {
while (*src == 0x20) src++;
if (*src != 0)
*dst++ = 0x20;
} else {
*dst++ = *src++;
}
}
*dst = 0;
if (dst == src)
return(NULL);
return(dst);
}
/**
* xmlAttrNormalizeSpace2:
* @src: the source string
*
* Normalize the space in non CDATA attribute values, a slightly more complex
* front end to avoid allocation problems when running on attribute values
* coming from the input.
*
* Returns a pointer to the normalized value (dst) or NULL if no conversion
* is needed.
*/
static const xmlChar *
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
{
int i;
int remove_head = 0;
int need_realloc = 0;
const xmlChar *cur;
if ((ctxt == NULL) || (src == NULL) || (len == NULL))
return(NULL);
i = *len;
if (i <= 0)
return(NULL);
cur = src;
while (*cur == 0x20) {
cur++;
remove_head++;
}
while (*cur != 0) {
if (*cur == 0x20) {
cur++;
if ((*cur == 0x20) || (*cur == 0)) {
need_realloc = 1;
break;
}
} else
cur++;
}
if (need_realloc) {
xmlChar *ret;
ret = xmlStrndup(src + remove_head, i - remove_head + 1);
if (ret == NULL) {
xmlErrMemory(ctxt, NULL);
return(NULL);
}
xmlAttrNormalizeSpace(ret, ret);
*len = strlen((const char *)ret);
return(ret);
} else if (remove_head) {
*len -= remove_head;
memmove(src, src + remove_head, 1 + *len);
return(src);
}
return(NULL);
}
/**
* xmlAddDefAttrs:
* @ctxt: an XML parser context
* @fullname: the element fullname
* @fullattr: the attribute fullname
* @value: the attribute value
*
* Add a defaulted attribute for an element
*/
static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
const xmlChar *fullname,
const xmlChar *fullattr,
const xmlChar *value) {
xmlDefAttrsPtr defaults;
xmlDefAttr *attr;
int len, expandedSize;
xmlHashedString name;
xmlHashedString prefix;
xmlHashedString hvalue;
const xmlChar *localname;
/*
* Allows to detect attribute redefinitions
*/
if (ctxt->attsSpecial != NULL) {
if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
return;
}
if (ctxt->attsDefault == NULL) {
ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
if (ctxt->attsDefault == NULL)
goto mem_error;
}
/*
* split the element name into prefix:localname , the string found
* are within the DTD and then not associated to namespace names.
*/
localname = xmlSplitQName3(fullname, &len);
if (localname == NULL) {
name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
prefix.name = NULL;
} else {
name = xmlDictLookupHashed(ctxt->dict, localname, -1);
prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
if (prefix.name == NULL)
goto mem_error;
}
if (name.name == NULL)
goto mem_error;
/*
* make sure there is some storage
*/
defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
if ((defaults == NULL) ||
(defaults->nbAttrs >= defaults->maxAttrs)) {
xmlDefAttrsPtr temp;
int newSize;
newSize = (defaults != NULL) ? 2 * defaults->maxAttrs : 4;
temp = xmlRealloc(defaults,
sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
if (temp == NULL)
goto mem_error;
if (defaults == NULL)
temp->nbAttrs = 0;
temp->maxAttrs = newSize;
defaults = temp;
if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
defaults, NULL) < 0) {
xmlFree(defaults);
goto mem_error;
}
}
/*
* Split the attribute name into prefix:localname , the string found
* are within the DTD and hen not associated to namespace names.
*/
localname = xmlSplitQName3(fullattr, &len);
if (localname == NULL) {
name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
prefix.name = NULL;
} else {
name = xmlDictLookupHashed(ctxt->dict, localname, -1);
prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
if (prefix.name == NULL)
goto mem_error;
}
if (name.name == NULL)
goto mem_error;
/* intern the string and precompute the end */
len = strlen((const char *) value);
hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
if (hvalue.name == NULL)
goto mem_error;
expandedSize = strlen((const char *) name.name);
if (prefix.name != NULL)
expandedSize += strlen((const char *) prefix.name);
expandedSize += len;
attr = &defaults->attrs[defaults->nbAttrs++];
attr->name = name;
attr->prefix = prefix;
attr->value = hvalue;
attr->valueEnd = hvalue.name + len;
attr->external = ctxt->external;
attr->expandedSize = expandedSize;
return;
mem_error:
xmlErrMemory(ctxt, NULL);
return;
}
/**
* xmlAddSpecialAttr:
* @ctxt: an XML parser context
* @fullname: the element fullname
* @fullattr: the attribute fullname
* @type: the attribute type
*
* Register this attribute type
*/
static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
const xmlChar *fullname,
const xmlChar *fullattr,
int type)
{
if (ctxt->attsSpecial == NULL) {
ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
if (ctxt->attsSpecial == NULL)
goto mem_error;
}
if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
return;
xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
(void *) (ptrdiff_t) type);
return;
mem_error:
xmlErrMemory(ctxt, NULL);
return;
}
/**
* xmlCleanSpecialAttrCallback:
*
* Removes CDATA attributes from the special attribute table
*/
static void
xmlCleanSpecialAttrCallback(void *payload, void *data,
const xmlChar *fullname, const xmlChar *fullattr,
const xmlChar *unused ATTRIBUTE_UNUSED) {
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
}
}
/**
* xmlCleanSpecialAttr:
* @ctxt: an XML parser context
*
* Trim the list of attributes defined to remove all those of type
* CDATA as they are not special. This call should be done when finishing
* to parse the DTD and before starting to parse the document root.
*/
static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
{
if (ctxt->attsSpecial == NULL)
return;
xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
if (xmlHashSize(ctxt->attsSpecial) == 0) {
xmlHashFree(ctxt->attsSpecial, NULL);
ctxt->attsSpecial = NULL;
}
return;
}
/**
* xmlCheckLanguageID:
* @lang: pointer to the string value
*
* DEPRECATED: Internal function, do not use.
*
* Checks that the value conforms to the LanguageID production:
*
* NOTE: this is somewhat deprecated, those productions were removed from
* the XML Second edition.
*
* [33] LanguageID ::= Langcode ('-' Subcode)*
* [34] Langcode ::= ISO639Code | IanaCode | UserCode
* [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
* [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
* [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
* [38] Subcode ::= ([a-z] | [A-Z])+
*
* The current REC reference the successors of RFC 1766, currently 5646
*
* http://www.rfc-editor.org/rfc/rfc5646.txt
* langtag = language
* ["-" script]
* ["-" region]
* *("-" variant)
* *("-" extension)
* ["-" privateuse]
* language = 2*3ALPHA ; shortest ISO 639 code
* ["-" extlang] ; sometimes followed by
* ; extended language subtags
* / 4ALPHA ; or reserved for future use
* / 5*8ALPHA ; or registered language subtag
*
* extlang = 3ALPHA ; selected ISO 639 codes
* *2("-" 3ALPHA) ; permanently reserved
*
* script = 4ALPHA ; ISO 15924 code
*
* region = 2ALPHA ; ISO 3166-1 code
* / 3DIGIT ; UN M.49 code
*
* variant = 5*8alphanum ; registered variants
* / (DIGIT 3alphanum)
*
* extension = singleton 1*("-" (2*8alphanum))
*
* ; Single alphanumerics
* ; "x" reserved for private use
* singleton = DIGIT ; 0 - 9
* / %x41-57 ; A - W
* / %x59-5A ; Y - Z
* / %x61-77 ; a - w
* / %x79-7A ; y - z
*
* it sounds right to still allow Irregular i-xxx IANA and user codes too
* The parser below doesn't try to cope with extension or privateuse
* that could be added but that's not interoperable anyway
*
* Returns 1 if correct 0 otherwise
**/
int
xmlCheckLanguageID(const xmlChar * lang)
{
const xmlChar *cur = lang, *nxt;
if (cur == NULL)
return (0);
if (((cur[0] == 'i') && (cur[1] == '-')) ||
((cur[0] == 'I') && (cur[1] == '-')) ||
((cur[0] == 'x') && (cur[1] == '-')) ||
((cur[0] == 'X') && (cur[1] == '-'))) {
/*
* Still allow IANA code and user code which were coming
* from the previous version of the XML-1.0 specification
* it's deprecated but we should not fail
*/
cur += 2;
while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
((cur[0] >= 'a') && (cur[0] <= 'z')))
cur++;
return(cur[0] == 0);
}
nxt = cur;
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
nxt++;
if (nxt - cur >= 4) {
/*
* Reserved
*/
if ((nxt - cur > 8) || (nxt[0] != 0))
return(0);
return(1);
}
if (nxt - cur < 2)
return(0);
/* we got an ISO 639 code */
if (nxt[0] == 0)
return(1);
if (nxt[0] != '-')
return(0);
nxt++;
cur = nxt;
/* now we can have extlang or script or region or variant */
if ((nxt[0] >= '0') && (nxt[0] <= '9'))
goto region_m49;
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
nxt++;
if (nxt - cur == 4)
goto script;
if (nxt - cur == 2)
goto region;
if ((nxt - cur >= 5) && (nxt - cur <= 8))
goto variant;
if (nxt - cur != 3)
return(0);
/* we parsed an extlang */
if (nxt[0] == 0)
return(1);
if (nxt[0] != '-')
return(0);
nxt++;
cur = nxt;
/* now we can have script or region or variant */
if ((nxt[0] >= '0') && (nxt[0] <= '9'))
goto region_m49;
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
nxt++;
if (nxt - cur == 2)
goto region;
if ((nxt - cur >= 5) && (nxt - cur <= 8))
goto variant;
if (nxt - cur != 4)
return(0);
/* we parsed a script */
script:
if (nxt[0] == 0)
return(1);
if (nxt[0] != '-')
return(0);
nxt++;
cur = nxt;
/* now we can have region or variant */
if ((nxt[0] >= '0') && (nxt[0] <= '9'))
goto region_m49;
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
nxt++;
if ((nxt - cur >= 5) && (nxt - cur <= 8))
goto variant;
if (nxt - cur != 2)
return(0);
/* we parsed a region */
region:
if (nxt[0] == 0)
return(1);
if (nxt[0] != '-')
return(0);
nxt++;
cur = nxt;
/* now we can just have a variant */
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
nxt++;
if ((nxt - cur < 5) || (nxt - cur > 8))
return(0);
/* we parsed a variant */
variant:
if (nxt[0] == 0)
return(1);
if (nxt[0] != '-')
return(0);
/* extensions and private use subtags not checked */
return (1);
region_m49:
if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
((nxt[2] >= '0') && (nxt[2] <= '9'))) {
nxt += 3;
goto region;
}
return(0);
}
/************************************************************************
* *
* Parser stacks related functions and macros *
* *
************************************************************************/
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
const xmlChar ** str);
/**
* xmlParserNsCreate:
*
* Create a new namespace database.
*
* Returns the new obejct.
*/
xmlParserNsData *
xmlParserNsCreate(void) {
xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
if (nsdb == NULL)
return(NULL);
memset(nsdb, 0, sizeof(*nsdb));
nsdb->defaultNsIndex = INT_MAX;
return(nsdb);
}
/**
* xmlParserNsFree:
* @nsdb: namespace database
*
* Free a namespace database.
*/
void
xmlParserNsFree(xmlParserNsData *nsdb) {
if (nsdb == NULL)
return;
xmlFree(nsdb->extra);
xmlFree(nsdb->hash);
xmlFree(nsdb);
}
/**
* xmlParserNsReset:
* @nsdb: namespace database
*
* Reset a namespace database.
*/
static void
xmlParserNsReset(xmlParserNsData *nsdb) {
if (nsdb == NULL)
return;
nsdb->hashElems = 0;
nsdb->elementId = 0;
nsdb->defaultNsIndex = INT_MAX;
if (nsdb->hash)
memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
}
/**
* xmlParserStartElement:
* @nsdb: namespace database
*
* Signal that a new element has started.
*
* Returns 0 on success, -1 if the element counter overflowed.
*/
static int
xmlParserNsStartElement(xmlParserNsData *nsdb) {
if (nsdb->elementId == UINT_MAX)
return(-1);
nsdb->elementId++;
return(0);
}
/**
* xmlParserNsLookup:
* @ctxt: parser context
* @prefix: namespace prefix
* @bucketPtr: optional bucket (return value)
*
* Lookup namespace with given prefix. If @bucketPtr is non-NULL, it will
* be set to the matching bucket, or the first empty bucket if no match
* was found.
*
* Returns the namespace index on success, INT_MAX if no namespace was
* found.
*/
static int
xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
xmlParserNsBucket **bucketPtr) {
xmlParserNsBucket *bucket;
unsigned index, hashValue;
if (prefix->name == NULL)
return(ctxt->nsdb->defaultNsIndex);
if (ctxt->nsdb->hashSize == 0)
return(INT_MAX);
hashValue = prefix->hashValue;
index = hashValue & (ctxt->nsdb->hashSize - 1);
bucket = &ctxt->nsdb->hash[index];
while (bucket->hashValue) {
if ((bucket->hashValue == hashValue) &&
(bucket->index != INT_MAX)) {
if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
if (bucketPtr != NULL)
*bucketPtr = bucket;
return(bucket->index);
}
}
index++;
bucket++;
if (index == ctxt->nsdb->hashSize) {
index = 0;
bucket = ctxt->nsdb->hash;
}
}
if (bucketPtr != NULL)
*bucketPtr = bucket;
return(INT_MAX);
}
/**
* xmlParserNsLookupUri:
* @ctxt: parser context
* @prefix: namespace prefix
*
* Lookup namespace URI with given prefix.
*
* Returns the namespace URI on success, NULL if no namespace was found.
*/
static const xmlChar *
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
const xmlChar *ret;
int nsIndex;
if (prefix->name == ctxt->str_xml)
return(ctxt->str_xml_ns);
nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
if (nsIndex == INT_MAX)
return(NULL);
ret = ctxt->nsTab[nsIndex * 2 + 1];
if (ret[0] == 0)
ret = NULL;
return(ret);
}
/**
* xmlParserNsLookupSax:
* @ctxt: parser context
* @prefix: namespace prefix
*
* Lookup extra data for the given prefix. This returns data stored
* with xmlParserNsUdpateSax.
*
* Returns the data on success, NULL if no namespace was found.
*/
void *
xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
xmlHashedString hprefix;
int nsIndex;
if (prefix == ctxt->str_xml)
return(NULL);
hprefix.name = prefix;
if (prefix != NULL)
hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
else
hprefix.hashValue = 0;
nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
if (nsIndex == INT_MAX)
return(NULL);
return(ctxt->nsdb->extra[nsIndex].saxData);
}
/**
* xmlParserNsUpdateSax:
* @ctxt: parser context
* @prefix: namespace prefix
* @saxData: extra data for SAX handler
*
* Sets or updates extra data for the given prefix. This value will be
* returned by xmlParserNsLookupSax as long as the namespace with the
* given prefix is in scope.
*
* Returns the data on success, NULL if no namespace was found.
*/
int
xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
void *saxData) {
xmlHashedString hprefix;
int nsIndex;
if (prefix == ctxt->str_xml)
return(-1);
hprefix.name = prefix;
if (prefix != NULL)
hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
else
hprefix.hashValue = 0;
nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
if (nsIndex == INT_MAX)
return(-1);
ctxt->nsdb->extra[nsIndex].saxData = saxData;
return(0);
}
/**
* xmlParserNsGrow:
* @ctxt: parser context
*
* Grows the namespace tables.
*
* Returns 0 on success, -1 if a memory allocation failed.
*/
static int
xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
const xmlChar **table;
xmlParserNsExtra *extra;
int newSize;
if (ctxt->nsMax > INT_MAX / 2)
goto error;
newSize = ctxt->nsMax ? ctxt->nsMax * 2 : 16;
table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
if (table == NULL)
goto error;
ctxt->nsTab = table;
extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
if (extra == NULL)
goto error;
ctxt->nsdb->extra = extra;
ctxt->nsMax = newSize;
return(0);
error:
xmlErrMemory(ctxt, NULL);
return(-1);
}
/**
* xmlParserNsPush:
* @ctxt: parser context
* @prefix: prefix with hash value
* @uri: uri with hash value
* @saxData: extra data for SAX handler
* @defAttr: whether the namespace comes from a default attribute
*
* Push a new namespace on the table.
*
* Returns 1 if the namespace was pushed, 0 if the namespace was ignored,
* -1 if a memory allocation failed.
*/
static int
xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
const xmlHashedString *uri, void *saxData, int defAttr) {
xmlParserNsBucket *bucket = NULL;
xmlParserNsExtra *extra;
const xmlChar **ns;
unsigned hashValue, nsIndex, oldIndex;
if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
return(0);
if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
xmlErrMemory(ctxt, NULL);
return(-1);
}
/*
* Default namespace and 'xml' namespace
*/
if ((prefix == NULL) || (prefix->name == NULL)) {
oldIndex = ctxt->nsdb->defaultNsIndex;
if (oldIndex != INT_MAX) {
if (defAttr != 0)
return(0);
extra = &ctxt->nsdb->extra[oldIndex];
if (extra->elementId == ctxt->nsdb->elementId) {
xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
return(0);
}
if ((ctxt->options & XML_PARSE_NSCLEAN) &&
(uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
return(0);
}
ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
goto populate_entry;
}
/*
* Hash table lookup
*/
oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
if (oldIndex != INT_MAX) {
extra = &ctxt->nsdb->extra[oldIndex];
if (defAttr != 0)
return(0);
/*
* Check for duplicate definitions on the same element.
*/
if (extra->elementId == ctxt->nsdb->elementId) {
xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
return(0);
}
if ((ctxt->options & XML_PARSE_NSCLEAN) &&
(uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
return(0);
bucket->index = ctxt->nsNr;
goto populate_entry;
}
/*
* Insert new bucket
*/
hashValue = prefix->hashValue;
/*
* Grow hash table, 50% fill factor
*/
if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
xmlParserNsBucket *newHash;
unsigned newSize, i, index;
if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
xmlErrMemory(ctxt, NULL);
return(-1);
}
newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
newHash = xmlMalloc(newSize * sizeof(newHash[0]));
if (newHash == NULL) {
xmlErrMemory(ctxt, NULL);
return(-1);
}
memset(newHash, 0, newSize * sizeof(newHash[0]));
for (i = 0; i < ctxt->nsdb->hashSize; i++) {
unsigned hv = ctxt->nsdb->hash[i].hashValue;
unsigned newIndex;
if (hv == 0)
continue;
newIndex = hv & (newSize - 1);
while (newHash[newIndex].hashValue != 0) {
newIndex++;
if (newIndex == newSize)
newIndex = 0;
}
newHash[newIndex] = ctxt->nsdb->hash[i];
}
xmlFree(ctxt->nsdb->hash);
ctxt->nsdb->hash = newHash;
ctxt->nsdb->hashSize = newSize;
/*
* Relookup
*/
index = hashValue & (newSize - 1);
while (newHash[index].hashValue != 0) {
index++;
if (index == newSize)
index = 0;
}
bucket = &newHash[index];
}
bucket->hashValue = hashValue;
bucket->index = ctxt->nsNr;
ctxt->nsdb->hashElems++;
oldIndex = INT_MAX;
populate_entry:
nsIndex = ctxt->nsNr;
ns = &ctxt->nsTab[nsIndex * 2];
ns[0] = prefix ? prefix->name : NULL;
ns[1] = uri->name;
extra = &ctxt->nsdb->extra[nsIndex];
extra->saxData = saxData;
extra->prefixHashValue = prefix ? prefix->hashValue : 0;
extra->uriHashValue = uri->hashValue;
extra->elementId = ctxt->nsdb->elementId;
extra->oldIndex = oldIndex;
ctxt->nsNr++;
return(1);
}
/**
* xmlParserNsPop:
* @ctxt: an XML parser context
* @nr: the number to pop
*
* Pops the top @nr namespaces and restores the hash table.
*
* Returns the number of namespaces popped.
*/
static int
xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
{
int i;
/* assert(nr <= ctxt->nsNr); */
for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
const xmlChar *prefix = ctxt->nsTab[i * 2];
xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
if (prefix == NULL) {
ctxt->nsdb->defaultNsIndex = extra->oldIndex;
} else {
xmlHashedString hprefix;
xmlParserNsBucket *bucket = NULL;
hprefix.name = prefix;
hprefix.hashValue = extra->prefixHashValue;
xmlParserNsLookup(ctxt, &hprefix, &bucket);
/* assert(bucket && bucket->hashValue); */
bucket->index = extra->oldIndex;
}
}
ctxt->nsNr -= nr;
return(nr);
}
static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
const xmlChar **atts;
unsigned *attallocs;
int maxatts;
if (nr + 5 > ctxt->maxatts) {
maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
atts = (const xmlChar **) xmlMalloc(
maxatts * sizeof(const xmlChar *));
if (atts == NULL) goto mem_error;
attallocs = xmlRealloc(ctxt->attallocs,
(maxatts / 5) * sizeof(attallocs[0]));
if (attallocs == NULL) {
xmlFree(atts);
goto mem_error;
}
if (ctxt->maxatts > 0)
memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
xmlFree(ctxt->atts);
ctxt->atts = atts;
ctxt->attallocs = attallocs;
ctxt->maxatts = maxatts;
}
return(ctxt->maxatts);
mem_error:
xmlErrMemory(ctxt, NULL);
return(-1);
}
/**
* inputPush:
* @ctxt: an XML parser context
* @value: the parser input
*
* Pushes a new parser input on top of the input stack
*
* Returns -1 in case of error, the index in the stack otherwise
*/
int
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
{
if ((ctxt == NULL) || (value == NULL))
return(-1);
if (ctxt->inputNr >= ctxt->inputMax) {
size_t newSize = ctxt->inputMax * 2;
xmlParserInputPtr *tmp;
tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
newSize * sizeof(*tmp));
if (tmp == NULL) {
xmlErrMemory(ctxt, NULL);
return (-1);
}
ctxt->inputTab = tmp;
ctxt->inputMax = newSize;
}
ctxt->inputTab[ctxt->inputNr] = value;
ctxt->input = value;
return (ctxt->inputNr++);
}
/**
* inputPop:
* @ctxt: an XML parser context
*
* Pops the top parser input from the input stack
*
* Returns the input just removed
*/
xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)
{
xmlParserInputPtr ret;
if (ctxt == NULL)
return(NULL);
if (ctxt->inputNr <= 0)
return (NULL);
ctxt->inputNr--;
if (ctxt->inputNr > 0)
ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
else
ctxt->input = NULL;
ret = ctxt->inputTab[ctxt->inputNr];
ctxt->inputTab[ctxt->inputNr] = NULL;
return (ret);
}
/**
* nodePush:
* @ctxt: an XML parser context
* @value: the element node
*
* DEPRECATED: Internal function, do not use.
*
* Pushes a new element node on top of the node stack
*
* Returns -1 in case of error, the index in the stack otherwise
*/
int
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
{
if (ctxt == NULL) return(0);
if (ctxt->nodeNr >= ctxt->nodeMax) {
xmlNodePtr *tmp;
tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
ctxt->nodeMax * 2 *
sizeof(ctxt->nodeTab[0]));
if (tmp == NULL) {
xmlErrMemory(ctxt, NULL);
return (-1);
}
ctxt->nodeTab = tmp;
ctxt->nodeMax *= 2;
}
if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
((ctxt->options & XML_PARSE_HUGE) == 0)) {
xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
"Excessive depth in document: %d use XML_PARSE_HUGE option\n",
xmlParserMaxDepth);
xmlHaltParser(ctxt);
return(-1);
}
ctxt->nodeTab[ctxt->nodeNr] = value;
ctxt->node = value;
return (ctxt->nodeNr++);
}
/**
* nodePop:
* @ctxt: an XML parser context
*
* DEPRECATED: Internal function, do not use.
*
* Pops the top element node from the node stack
*
* Returns the node just removed
*/
xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)
{
xmlNodePtr ret;
if (ctxt == NULL) return(NULL);
if (ctxt->nodeNr <= 0)
return (NULL);
ctxt->nodeNr--;
if (ctxt->nodeNr > 0)
ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
else
ctxt->node = NULL;
ret = ctxt->nodeTab[ctxt->nodeNr];
ctxt->nodeTab[ctxt->nodeNr] = NULL;
return (ret);
}
/**
* nameNsPush:
* @ctxt: an XML parser context
* @value: the element name
* @prefix: the element prefix
* @URI: the element namespace name
* @line: the current line number for error messages
* @nsNr: the number of namespaces pushed on the namespace table
*
* Pushes a new element name/prefix/URL on top of the name stack
*
* Returns -1 in case of error, the index in the stack otherwise
*/
static int
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
{
xmlStartTag *tag;
if (ctxt->nameNr >= ctxt->nameMax) {
const xmlChar * *tmp;
xmlStartTag *tmp2;
ctxt->nameMax *= 2;
tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
ctxt->nameMax *
sizeof(ctxt->nameTab[0]));
if (tmp == NULL) {
ctxt->nameMax /= 2;
goto mem_error;
}
ctxt->nameTab = tmp;
tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
ctxt->nameMax *
sizeof(ctxt->pushTab[0]));
if (tmp2 == NULL) {
ctxt->nameMax /= 2;
goto mem_error;
}
ctxt->pushTab = tmp2;
} else if (ctxt->pushTab == NULL) {
ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
sizeof(ctxt->pushTab[0]));
if (ctxt->pushTab == NULL)
goto mem_error;
}
ctxt->nameTab[ctxt->nameNr] = value;
ctxt->name = value;
tag = &ctxt->pushTab[ctxt->nameNr];
tag->prefix = prefix;
tag->URI = URI;
tag->line = line;
tag->nsNr = nsNr;
return (ctxt->nameNr++);
mem_error:
xmlErrMemory(ctxt, NULL);
return (-1);
}
#ifdef LIBXML_PUSH_ENABLED
/**
* nameNsPop:
* @ctxt: an XML parser context
*
* Pops the top element/prefix/URI name from the name stack
*
* Returns the name just removed
*/
static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)
{
const xmlChar *ret;
if (ctxt->nameNr <= 0)
return (NULL);
ctxt->nameNr--;
if (ctxt->nameNr > 0)
ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
else
ctxt->name = NULL;
ret = ctxt->nameTab[ctxt->nameNr];
ctxt->nameTab[ctxt->nameNr] = NULL;
return (ret);
}
#endif /* LIBXML_PUSH_ENABLED */
/**
* namePush:
* @ctxt: an XML parser context
* @value: the element name
*
* DEPRECATED: Internal function, do not use.
*
* Pushes a new element name on top of the name stack
*
* Returns -1 in case of error, the index in the stack otherwise
*/
int
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
{
if (ctxt == NULL) return (-1);
if (ctxt->nameNr >= ctxt->nameMax) {
const xmlChar * *tmp;
tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
ctxt->nameMax * 2 *
sizeof(ctxt->nameTab[0]));
if (tmp == NULL) {
goto mem_error;
}
ctxt->nameTab = tmp;
ctxt->nameMax *= 2;
}
ctxt->nameTab[ctxt->nameNr] = value;
ctxt->name = value;
return (ctxt->nameNr++);
mem_error:
xmlErrMemory(ctxt, NULL);
return (-1);
}
/**
* namePop:
* @ctxt: an XML parser context
*
* DEPRECATED: Internal function, do not use.
*
* Pops the top element name from the name stack
*
* Returns the name just removed
*/
const xmlChar *
namePop(xmlParserCtxtPtr ctxt)
{
const xmlChar *ret;
if ((ctxt == NULL) || (ctxt->nameNr <= 0))
return (NULL);
ctxt->nameNr--;
if (ctxt->nameNr > 0)
ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
else
ctxt->name = NULL;
ret = ctxt->nameTab[ctxt->nameNr];
ctxt->nameTab[ctxt->nameNr] = NULL;
return (ret);
}
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
if (ctxt->spaceNr >= ctxt->spaceMax) {
int *tmp;
ctxt->spaceMax *= 2;
tmp = (int *) xmlRealloc(ctxt->spaceTab,
ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
if (tmp == NULL) {
xmlErrMemory(ctxt, NULL);
ctxt->spaceMax /=2;
return(-1);
}
ctxt->spaceTab = tmp;
}
ctxt->spaceTab[ctxt->spaceNr] = val;
ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
return(ctxt->spaceNr++);
}
static int spacePop(xmlParserCtxtPtr ctxt) {
int ret;
if (ctxt->spaceNr <= 0) return(0);
ctxt->spaceNr--;
if (ctxt->spaceNr > 0)
ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
else
ctxt->space = &ctxt->spaceTab[0];
ret = ctxt->spaceTab[ctxt->spaceNr];
ctxt->spaceTab[ctxt->spaceNr] = -1;
return(ret);
}
/*
* Macros for accessing the content. Those should be used only by the parser,
* and not exported.
*
* Dirty macros, i.e. one often need to make assumption on the context to
* use them
*
* CUR_PTR return the current pointer to the xmlChar to be parsed.
* To be used with extreme caution since operations consuming
* characters may move the input buffer to a different location !
* CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
* This should be used internally by the parser
* only to compare to ASCII values otherwise it would break when
* running with UTF-8 encoding.
* RAW same as CUR but in the input buffer, bypass any token
* extraction that may have been done
* NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
* to compare on ASCII based substring.
* SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
* strings without newlines within the parser.
* NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
* defined char within the parser.
* Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
*
* NEXT Skip to the next character, this does the proper decoding
* in UTF-8 mode. It also pop-up unfinished entities on the fly.
* NEXTL(l) Skip the current unicode character of l xmlChars long.
* CUR_CHAR(l) returns the current unicode character (int), set l
* to the number of xmlChars used for the encoding [0-5].
* CUR_SCHAR same but operate on a string instead of the context
* COPY_BUF copy the current unicode char to the target buffer, increment
* the index
* GROW, SHRINK handling of input buffers
*/
#define RAW (*ctxt->input->cur)
#define CUR (*ctxt->input->cur)
#define NXT(val) ctxt->input->cur[(val)]
#define CUR_PTR ctxt->input->cur
#define BASE_PTR ctxt->input->base
#define CMP4( s, c1, c2, c3, c4 ) \
( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
#define CMP5( s, c1, c2, c3, c4, c5 ) \
( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
((unsigned char *) s)[ 8 ] == c9 )
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
((unsigned char *) s)[ 9 ] == c10 )
#define SKIP(val) do { \
ctxt->input->cur += (val),ctxt->input->col+=(val); \
if (*ctxt->input->cur == 0) \
xmlParserGrow(ctxt); \
} while (0)
#define SKIPL(val) do { \
int skipl; \
for(skipl=0; skipl<val; skipl++) { \
if (*(ctxt->input->cur) == '\n') { \
ctxt->input->line++; ctxt->input->col = 1; \
} else ctxt->input->col++; \
ctxt->input->cur++; \
} \
if (*ctxt->input->cur == 0) \
xmlParserGrow(ctxt); \
} while (0)
/* Don't shrink push parser buffer. */
#define SHRINK \
if (((ctxt->progressive == 0) || (ctxt->inputNr > 1)) && \
(ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
(ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
xmlParserShrink(ctxt);
#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \
xmlParserGrow(ctxt);
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
#define NEXT xmlNextChar(ctxt)
#define NEXT1 { \
ctxt->input->col++; \
ctxt->input->cur++; \
if (*ctxt->input->cur == 0) \
xmlParserGrow(ctxt); \
}
#define NEXTL(l) do { \
if (*(ctxt->input->cur) == '\n') { \
ctxt->input->line++; ctxt->input->col = 1; \
} else ctxt->input->col++; \
ctxt->input->cur += l; \
} while (0)
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
#define COPY_BUF(b, i, v) \
if (v < 0x80) b[i++] = v; \
else i += xmlCopyCharMultiByte(&b[i],v)
/**
* xmlSkipBlankChars:
* @ctxt: the XML parser context
*
* DEPRECATED: Internal function, do not use.
*
* skip all blanks character found at that point in the input streams.
* It pops up finished entities in the process if allowable at that point.
*
* Returns the number of space chars skipped
*/
int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
int res = 0;
/*
* It's Okay to use CUR/NEXT here since all the blanks are on
* the ASCII range.
*/
if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
(ctxt->instate == XML_PARSER_START)) {
const xmlChar *cur;
/*
* if we are in the document content, go really fast
*/
cur = ctxt->input->cur;
while (IS_BLANK_CH(*cur)) {
if (*cur == '\n') {
ctxt->input->line++; ctxt->input->col = 1;
} else {
ctxt->input->col++;
}
cur++;
if (res < INT_MAX)
res++;
if (*cur == 0) {
ctxt->input->cur = cur;
xmlParserGrow(ctxt);
cur = ctxt->input->cur;
}
}
ctxt->input->cur = cur;
} else {
int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
while (ctxt->instate != XML_PARSER_EOF) {
if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
NEXT;
} else if (CUR == '%') {
/*
* Need to handle support of entities branching here
*/
if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
break;
xmlParsePEReference(ctxt);
} else if (CUR == 0) {
unsigned long consumed;
xmlEntityPtr ent;
if (ctxt->inputNr <= 1)
break;
consumed = ctxt->input->consumed;
xmlSaturatedAddSizeT(&consumed,
ctxt->input->cur - ctxt->input->base);
/*
* Add to sizeentities when parsing an external entity
* for the first time.
*/
ent = ctxt->input->entity;
if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
((ent->flags & XML_ENT_PARSED) == 0)) {
ent->flags |= XML_ENT_PARSED;
xmlSaturatedAdd(&ctxt->sizeentities, consumed);
}
xmlParserEntityCheck(ctxt, consumed);
xmlPopInput(ctxt);
} else {
break;
}
/*
* Also increase the counter when entering or exiting a PERef.
* The spec says: "When a parameter-entity reference is recognized
* in the DTD and included, its replacement text MUST be enlarged
* by the attachment of one leading and one following space (#x20)
* character."
*/
if (res < INT_MAX)
res++;
}
}
return(res);
}
/************************************************************************
* *
* Commodity functions to handle entities *
* *
************************************************************************/
/**
* xmlPopInput:
* @ctxt: an XML parser context
*
* xmlPopInput: the current input pointed by ctxt->input came to an end
* pop it and return the next char.
*
* Returns the current xmlChar in the parser context
*/
xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt) {
xmlParserInputPtr input;
if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
if (xmlParserDebugEntities)
xmlGenericError(xmlGenericErrorContext,
"Popping input %d\n", ctxt->inputNr);
if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
(ctxt->instate != XML_PARSER_EOF))
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
"Unfinished entity outside the DTD");
input = inputPop(ctxt);
if (input->entity != NULL)
input->entity->flags &= ~XML_ENT_EXPANDING;
xmlFreeInputStream(input);
if (*ctxt->input->cur == 0)
xmlParserGrow(ctxt);
return(CUR);
}
/**
* xmlPushInput:
* @ctxt: an XML parser context
* @input: an XML parser input fragment (entity, XML fragment ...).
*
* xmlPushInput: switch to a new input stream which is stacked on top
* of the previous one(s).
* Returns -1 in case of error or the index in the input stack
*/
int
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
int ret;
if (input == NULL) return(-1);
if (xmlParserDebugEntities) {
if ((ctxt->input != NULL) && (ctxt->input->filename))
xmlGenericError(xmlGenericErrorContext,
"%s(%d): ", ctxt->input->filename,
ctxt->input->line);
xmlGenericError(xmlGenericErrorContext,
"Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
}
if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
(ctxt->inputNr > 100)) {
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
while (ctxt->inputNr > 1)
xmlFreeInputStream(inputPop(ctxt));
return(-1);
}
ret = inputPush(ctxt, input);
if (ctxt->instate == XML_PARSER_EOF)
return(-1);
GROW;
return(ret);
}
/**
* xmlParseCharRef:
* @ctxt: an XML parser context
*
* DEPRECATED: Internal function, don't use.
*
* Parse a numeric character reference. Always consumes '&'.
*
* [66] CharRef ::= '&#' [0-9]+ ';' |
* '&#x' [0-9a-fA-F]+ ';'
*
* [ WFC: Legal Character ]
* Characters referred to using character references must match the
* production for Char.
*
* Returns the value parsed (as an int), 0 in case of error
*/
int
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
int val = 0;
int count = 0;
/*
* Using RAW/CUR/NEXT is okay since we are working on ASCII range here
*/
if ((RAW == '&') && (NXT(1) == '#') &&
(NXT(2) == 'x')) {
SKIP(3);
GROW;
while (RAW != ';') { /* loop blocked by count */
if (count++ > 20) {
count = 0;
GROW;
if (ctxt->instate == XML_PARSER_EOF)
return(0);
}
if ((RAW >= '0') && (RAW <= '9'))
val = val * 16 + (CUR - '0');
else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
val = val * 16 + (CUR - 'a') + 10;
else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
val = val * 16 + (CUR - 'A') + 10;
else {
xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
val = 0;
break;
}
if (val > 0x110000)
val = 0x110000;
NEXT;
count++;
}
if (RAW == ';') {
/* on purpose to avoid reentrancy problems with NEXT and SKIP */
ctxt->input->col++;
ctxt->input->cur++;
}
} else if ((RAW == '&') && (NXT(1) == '#')) {
SKIP(2);
GROW;
while (RAW != ';') { /* loop blocked by count */
if (count++ > 20) {
count = 0;
GROW;
if (ctxt->instate == XML_PARSER_EOF)
return(0);
}
if ((RAW >= '0') && (RAW <= '9'))
val = val * 10 + (CUR - '0');
else {
xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
val = 0;
break;
}
if (val > 0x110000)
val = 0x110000;
NEXT;
count++;
}
if (RAW == ';') {
/* on purpose to avoid reentrancy problems with NEXT and SKIP */
ctxt->input->col++;
ctxt->input->cur++;
}
} else {
if (RAW == '&')
SKIP(1);
xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
}
/*
* [ WFC: Legal Character ]
* Characters referred to using character references must match the
* production for Char.
*/
if (val >= 0x110000) {
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
"xmlParseCharRef: character reference out of bounds\n",
val);
} else if (IS_CHAR(val)) {
return(val);
} else {
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
"xmlParseCharRef: invalid xmlChar value %d\n",
val);
}
return(0);
}
/**
* xmlParseStringCharRef:
* @ctxt: an XML parser context
* @str: a pointer to an index in the string
*
* parse Reference declarations, variant parsing from a string rather
* than an an input flow.
*
* [66] CharRef ::= '&#' [0-9]+ ';' |
* '&#x' [0-9a-fA-F]+ ';'
*
* [ WFC: Legal Character ]
* Characters referred to using character references must match the
* production for Char.
*
* Returns the value parsed (as an int), 0 in case of error, str will be
* updated to the current value of the index
*/
static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
const xmlChar *ptr;
xmlChar cur;
int val = 0;
if ((str == NULL) || (*str == NULL)) return(0);
ptr = *str;
cur = *ptr;
if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
ptr += 3;
cur = *ptr;
while (cur != ';') { /* Non input consuming loop */
if ((cur >= '0') && (cur <= '9'))
val = val * 16 + (cur - '0');
else if ((cur >= 'a') && (cur <= 'f'))
val = val * 16 + (cur - 'a') + 10;
else if ((cur >= 'A') && (cur <= 'F'))
val = val * 16 + (cur - 'A') + 10;
else {
xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
val = 0;
break;
}
if (val > 0x110000)
val = 0x110000;
ptr++;
cur = *ptr;
}
if (cur == ';')
ptr++;
} else if ((cur == '&') && (ptr[1] == '#')){
ptr += 2;
cur = *ptr;
while (cur != ';') { /* Non input consuming loops */
if ((cur >= '0') && (cur <= '9'))
val = val * 10 + (cur - '0');
else {
xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
val = 0;
break;
}
if (val > 0x110000)
val = 0x110000;
ptr++;
cur = *ptr;
}
if (cur == ';')
ptr++;
} else {
xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
return(0);
}
*str = ptr;
/*
* [ WFC: Legal Character ]
* Characters referred to using character references must match the
* production for Char.
*/
if (val >= 0x110000) {
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
"xmlParseStringCharRef: character reference out of bounds\n",
val);
} else if (IS_CHAR(val)) {
return(val);
} else {
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
"xmlParseStringCharRef: invalid xmlChar value %d\n",
val);
}
return(0);
}
/**
* xmlParserHandlePEReference:
* @ctxt: the parser context
*
* DEPRECATED: Internal function, do not use.
*
* [69] PEReference ::= '%' Name ';'
*
* [ WFC: No Recursion ]
* A parsed entity must not contain a recursive
* reference to itself, either directly or indirectly.
*
* [ WFC: Entity Declared ]
* In a document without any DTD, a document with only an internal DTD
* subset which contains no parameter entity references, or a document
* with "standalone='yes'", ... ... The declaration of a parameter
* entity must precede any reference to it...
*
* [ VC: Entity Declared ]
* In a document with an external subset or external parameter entities
* with "standalone='no'", ... ... The declaration of a parameter entity
* must precede any reference to it...
*
* [ WFC: In DTD ]
* Parameter-entity references may only appear in the DTD.
* NOTE: misleading but this is handled.
*
* A PEReference may have been detected in the current input stream
* the handling is done accordingly to
* http://www.w3.org/TR/REC-xml#entproc
* i.e.
* - Included in literal in entity values
* - Included as Parameter Entity reference within DTDs
*/
void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
switch(ctxt->instate) {
case XML_PARSER_CDATA_SECTION:
return;
case XML_PARSER_COMMENT:
return;
case XML_PARSER_START_TAG:
return;
case XML_PARSER_END_TAG:
return;
case XML_PARSER_EOF:
xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
return;
case XML_PARSER_PROLOG:
case XML_PARSER_START:
case XML_PARSER_XML_DECL:
case XML_PARSER_MISC:
xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
return;
case XML_PARSER_ENTITY_DECL:
case XML_PARSER_CONTENT:
case XML_PARSER_ATTRIBUTE_VALUE:
case XML_PARSER_PI:
case XML_PARSER_SYSTEM_LITERAL:
case XML_PARSER_PUBLIC_LITERAL:
/* we just ignore it there */
return;
case XML_PARSER_EPILOG:
xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
return;
case XML_PARSER_ENTITY_VALUE:
/*
* NOTE: in the case of entity values, we don't do the
* substitution here since we need the literal
* entity value to be able to save the internal
* subset of the document.
* This will be handled by xmlStringDecodeEntities
*/
return;
case XML_PARSER_DTD:
/*
* [WFC: Well-Formedness Constraint: PEs in Internal Subset]
* In the internal DTD subset, parameter-entity references
* can occur only where markup declarations can occur, not
* within markup declarations.
* In that case this is handled in xmlParseMarkupDecl
*/
if ((ctxt->external == 0) && (ctxt->inputNr == 1))
return;
if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
return;
break;
case XML_PARSER_IGNORE:
return;
}
xmlParsePEReference(ctxt);
}
/*
* Macro used to grow the current buffer.
* buffer##_size is expected to be a size_t
* mem_error: is expected to handle memory allocation failures
*/
#define growBuffer(buffer, n) { \
xmlChar *tmp; \
size_t new_size = buffer##_size * 2 + n; \
if (new_size < buffer##_size) goto mem_error; \
tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
if (tmp == NULL) goto mem_error; \
buffer = tmp; \
buffer##_size = new_size; \
}
/**
* xmlStringDecodeEntitiesInt:
* @ctxt: the parser context
* @str: the input string
* @len: the string length
* @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
* @end: an end marker xmlChar, 0 if none
* @end2: an end marker xmlChar, 0 if none
* @end3: an end marker xmlChar, 0 if none
* @check: whether to perform entity checks
*/
static xmlChar *
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
int what, xmlChar end, xmlChar end2, xmlChar end3,
int check) {
xmlChar *buffer = NULL;
size_t buffer_size = 0;
size_t nbchars = 0;
xmlChar *current = NULL;
xmlChar *rep = NULL;
const xmlChar *last;
xmlEntityPtr ent;
int c,l;
if (str == NULL)
return(NULL);
last = str + len;
if (((ctxt->depth > 40) &&
((ctxt->options & XML_PARSE_HUGE) == 0)) ||
(ctxt->depth > 100)) {
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
"Maximum entity nesting depth exceeded");
return(NULL);
}
/*
* allocate a translation buffer.
*/
buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
if (buffer == NULL) goto mem_error;
/*
* OK loop until we reach one of the ending char or a size limit.
* we are operating on already parsed values.
*/
if (str < last)
c = CUR_SCHAR(str, l);
else
c = 0;
while ((c != 0) && (c != end) && /* non input consuming loop */
(c != end2) && (c != end3) &&
(ctxt->instate != XML_PARSER_EOF)) {
if (c == 0) break;
if ((c == '&') && (str[1] == '#')) {
int val = xmlParseStringCharRef(ctxt, &str);
if (val == 0)
goto int_error;
COPY_BUF(buffer, nbchars, val);
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
}
} else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
if (xmlParserDebugEntities)
xmlGenericError(xmlGenericErrorContext,
"String decoding Entity Reference: %.30s\n",
str);
ent = xmlParseStringEntityRef(ctxt, &str);
if ((ent != NULL) &&
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
if (ent->content != NULL) {
COPY_BUF(buffer, nbchars, ent->content[0]);
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
}
} else {
xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
"predefined entity has no content\n");
goto int_error;
}
} else if ((ent != NULL) && (ent->content != NULL)) {
if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
goto int_error;
if (ent->flags & XML_ENT_EXPANDING) {
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
xmlHaltParser(ctxt);
ent->content[0] = 0;
goto int_error;
}
ent->flags |= XML_ENT_EXPANDING;
ctxt->depth++;
rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
ent->length, what, 0, 0, 0, check);
ctxt->depth--;
ent->flags &= ~XML_ENT_EXPANDING;
if (rep == NULL) {
ent->content[0] = 0;
goto int_error;
}
current = rep;
while (*current != 0) { /* non input consuming loop */
buffer[nbchars++] = *current++;
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
}
}
xmlFree(rep);
rep = NULL;
} else if (ent != NULL) {
int i = xmlStrlen(ent->name);
const xmlChar *cur = ent->name;
buffer[nbchars++] = '&';
if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
}
for (;i > 0;i--)
buffer[nbchars++] = *cur++;
buffer[nbchars++] = ';';
}
} else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
if (xmlParserDebugEntities)
xmlGenericError(xmlGenericErrorContext,
"String decoding PE Reference: %.30s\n", str);
ent = xmlParseStringPEReference(ctxt, &str);
if (ent != NULL) {
if (ent->content == NULL) {
/*
* Note: external parsed entities will not be loaded,
* it is not required for a non-validating parser to
* complete external PEReferences coming from the
* internal subset
*/
if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
(ctxt->validate != 0)) {
xmlLoadEntityContent(ctxt, ent);
} else {
xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
"not validating will not read content for PE entity %s\n",
ent->name, NULL);
}
}
if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
goto int_error;
if (ent->flags & XML_ENT_EXPANDING) {
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
xmlHaltParser(ctxt);
if (ent->content != NULL)
ent->content[0] = 0;
goto int_error;
}
ent->flags |= XML_ENT_EXPANDING;
ctxt->depth++;
rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
ent->length, what, 0, 0, 0, check);
ctxt->depth--;
ent->flags &= ~XML_ENT_EXPANDING;
if (rep == NULL) {
if (ent->content != NULL)
ent->content[0] = 0;
goto int_error;
}
current = rep;
while (*current != 0) { /* non input consuming loop */
buffer[nbchars++] = *current++;
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
}
}
xmlFree(rep);
rep = NULL;
}
} else {
COPY_BUF(buffer, nbchars, c);
str += l;
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
}
}
if (str < last)
c = CUR_SCHAR(str, l);
else
c = 0;
}
buffer[nbchars] = 0;
return(buffer);
mem_error:
xmlErrMemory(ctxt, NULL);
int_error:
if (rep != NULL)
xmlFree(rep);
if (buffer != NULL)
xmlFree(buffer);
return(NULL);
}
/**
* xmlStringLenDecodeEntities:
* @ctxt: the parser context
* @str: the input string
* @len: the string length
* @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
* @end: an end marker xmlChar, 0 if none
* @end2: an end marker xmlChar, 0 if none
* @end3: an end marker xmlChar, 0 if none
*
* DEPRECATED: Internal function, don't use.
*
* Takes a entity string content and process to do the adequate substitutions.
*
* [67] Reference ::= EntityRef | CharRef
*
* [69] PEReference ::= '%' Name ';'
*
* Returns A newly allocated string with the substitution done. The caller
* must deallocate it !
*/
xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
int what, xmlChar end, xmlChar end2,
xmlChar end3) {
if ((ctxt == NULL) || (str == NULL) || (len < 0))
return(NULL);
return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
end, end2, end3, 0));
}
/**
* xmlStringDecodeEntities:
* @ctxt: the parser context
* @str: the input string
* @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
* @end: an end marker xmlChar, 0 if none
* @end2: an end marker xmlChar, 0 if none
* @end3: an end marker xmlChar, 0 if none
*
* DEPRECATED: Internal function, don't use.
*
* Takes a entity string content and process to do the adequate substitutions.
*
* [67] Reference ::= EntityRef | CharRef
*
* [69] PEReference ::= '%' Name ';'
*
* Returns A newly allocated string with the substitution done. The caller
* must deallocate it !
*/
xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
xmlChar end, xmlChar end2, xmlChar end3) {
if ((ctxt == NULL) || (str == NULL)) return(NULL);
return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
end, end2, end3, 0));
}
/************************************************************************
* *
* Commodity functions, cleanup needed ? *
* *
************************************************************************/
/**
* areBlanks:
* @ctxt: an XML parser context
* @str: a xmlChar *
* @len: the size of @str
* @blank_chars: we know the chars are blanks
*
* Is this a sequence of blank chars that one can ignore ?
*
* Returns 1 if ignorable 0 otherwise.
*/
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
int blank_chars) {
int i, ret;
xmlNodePtr lastChild;
/*
* Don't spend time trying to differentiate them, the same callback is
* used !
*/
if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)