| /* |
| * parser.c : an XML 1.0 parser, namespaces and validity support are mostly |
| * implemented on top of the SAX interfaces |
| * |
| * References: |
| * The XML specification: |
| * http://www.w3.org/TR/REC-xml |
| * Original 1.0 version: |
| * http://www.w3.org/TR/1998/REC-xml-19980210 |
| * XML second edition working draft |
| * http://www.w3.org/TR/2000/WD-xml-2e-20000814 |
| * |
| * Okay this is a big file, the parser core is around 7000 lines, then it |
| * is followed by the progressive parser top routines, then the various |
| * high level APIs to call the parser and a few miscellaneous functions. |
| * A number of helper functions and deprecated ones have been moved to |
| * parserInternals.c to reduce this file size. |
| * As much as possible the functions are associated with their relative |
| * production in the XML specification. A few productions defining the |
| * different ranges of character are actually implanted either in |
| * parserInternals.h or parserInternals.c |
| * The DOM tree build is realized from the default SAX callbacks in |
| * the module SAX.c. |
| * The routines doing the validation checks are in valid.c and called either |
| * from the SAX callbacks or as standalone functions using a preparsed |
| * document. |
| * |
| * See Copyright for the status of this software. |
| * |
| * daniel@veillard.com |
| */ |
| |
| #define IN_LIBXML |
| #include "libxml.h" |
| |
| #if defined(WIN32) && !defined (__CYGWIN__) |
| #define XML_DIR_SEP '\\' |
| #else |
| #define XML_DIR_SEP '/' |
| #endif |
| |
| #include <stdlib.h> |
| #include <limits.h> |
| #include <string.h> |
| #include <stdarg.h> |
| #include <libxml/xmlmemory.h> |
| #include <libxml/threads.h> |
| #include <libxml/globals.h> |
| #include <libxml/tree.h> |
| #include <libxml/parser.h> |
| #include <libxml/parserInternals.h> |
| #include <libxml/valid.h> |
| #include <libxml/entities.h> |
| #include <libxml/xmlerror.h> |
| #include <libxml/encoding.h> |
| #include <libxml/xmlIO.h> |
| #include <libxml/uri.h> |
| #ifdef LIBXML_CATALOG_ENABLED |
| #include <libxml/catalog.h> |
| #endif |
| #ifdef LIBXML_SCHEMAS_ENABLED |
| #include <libxml/xmlschemastypes.h> |
| #include <libxml/relaxng.h> |
| #endif |
| #ifdef HAVE_CTYPE_H |
| #include <ctype.h> |
| #endif |
| #ifdef HAVE_STDLIB_H |
| #include <stdlib.h> |
| #endif |
| #ifdef HAVE_SYS_STAT_H |
| #include <sys/stat.h> |
| #endif |
| #ifdef HAVE_FCNTL_H |
| #include <fcntl.h> |
| #endif |
| #ifdef HAVE_UNISTD_H |
| #include <unistd.h> |
| #endif |
| #ifdef HAVE_ZLIB_H |
| #include <zlib.h> |
| #endif |
| #ifdef HAVE_LZMA_H |
| #include <lzma.h> |
| #endif |
| |
| #include "buf.h" |
| #include "enc.h" |
| |
| static void |
| xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); |
| |
| static xmlParserCtxtPtr |
| xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, |
| const xmlChar *base, xmlParserCtxtPtr pctx); |
| |
| static void xmlHaltParser(xmlParserCtxtPtr ctxt); |
| |
| /************************************************************************ |
| * * |
| * Arbitrary limits set in the parser. See XML_PARSE_HUGE * |
| * * |
| ************************************************************************/ |
| |
| #define XML_PARSER_BIG_ENTITY 1000 |
| #define XML_PARSER_LOT_ENTITY 5000 |
| |
| /* |
| * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity |
| * replacement over the size in byte of the input indicates that you have |
| * and eponential behaviour. A value of 10 correspond to at least 3 entity |
| * replacement per byte of input. |
| */ |
| #define XML_PARSER_NON_LINEAR 10 |
| |
| /* |
| * xmlParserEntityCheck |
| * |
| * Function to check non-linear entity expansion behaviour |
| * This is here to detect and stop exponential linear entity expansion |
| * This is not a limitation of the parser but a safety |
| * boundary feature. It can be disabled with the XML_PARSE_HUGE |
| * parser option. |
| */ |
| static int |
| xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size, |
| xmlEntityPtr ent, size_t replacement) |
| { |
| size_t consumed = 0; |
| |
| if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) |
| return (0); |
| if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) |
| return (1); |
| |
| /* |
| * This may look absurd but is needed to detect |
| * entities problems |
| */ |
| if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && |
| (ent->content != NULL) && (ent->checked == 0)) { |
| unsigned long oldnbent = ctxt->nbentities; |
| xmlChar *rep; |
| |
| ent->checked = 1; |
| |
| ++ctxt->depth; |
| rep = xmlStringDecodeEntities(ctxt, ent->content, |
| XML_SUBSTITUTE_REF, 0, 0, 0); |
| --ctxt->depth; |
| |
| ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; |
| if (rep != NULL) { |
| if (xmlStrchr(rep, '<')) |
| ent->checked |= 1; |
| xmlFree(rep); |
| rep = NULL; |
| } |
| } |
| if (replacement != 0) { |
| if (replacement < XML_MAX_TEXT_LENGTH) |
| return(0); |
| |
| /* |
| * If the volume of entity copy reaches 10 times the |
| * amount of parsed data and over the large text threshold |
| * then that's very likely to be an abuse. |
| */ |
| if (ctxt->input != NULL) { |
| consumed = ctxt->input->consumed + |
| (ctxt->input->cur - ctxt->input->base); |
| } |
| consumed += ctxt->sizeentities; |
| |
| if (replacement < XML_PARSER_NON_LINEAR * consumed) |
| return(0); |
| } else if (size != 0) { |
| /* |
| * Do the check based on the replacement size of the entity |
| */ |
| if (size < XML_PARSER_BIG_ENTITY) |
| return(0); |
| |
| /* |
| * A limit on the amount of text data reasonably used |
| */ |
| if (ctxt->input != NULL) { |
| consumed = ctxt->input->consumed + |
| (ctxt->input->cur - ctxt->input->base); |
| } |
| consumed += ctxt->sizeentities; |
| |
| if ((size < XML_PARSER_NON_LINEAR * consumed) && |
| (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed)) |
| return (0); |
| } else if (ent != NULL) { |
| /* |
| * use the number of parsed entities in the replacement |
| */ |
| size = ent->checked / 2; |
| |
| /* |
| * The amount of data parsed counting entities size only once |
| */ |
| if (ctxt->input != NULL) { |
| consumed = ctxt->input->consumed + |
| (ctxt->input->cur - ctxt->input->base); |
| } |
| consumed += ctxt->sizeentities; |
| |
| /* |
| * Check the density of entities for the amount of data |
| * knowing an entity reference will take at least 3 bytes |
| */ |
| if (size * 3 < consumed * XML_PARSER_NON_LINEAR) |
| return (0); |
| } else { |
| /* |
| * strange we got no data for checking |
| */ |
| if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) && |
| (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) || |
| (ctxt->nbentities <= 10000)) |
| return (0); |
| } |
| xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); |
| return (1); |
| } |
| |
| /** |
| * xmlParserMaxDepth: |
| * |
| * arbitrary depth limit for the XML documents that we allow to |
| * process. This is not a limitation of the parser but a safety |
| * boundary feature. It can be disabled with the XML_PARSE_HUGE |
| * parser option. |
| */ |
| unsigned int xmlParserMaxDepth = 256; |
| |
| |
| |
| #define SAX2 1 |
| #define XML_PARSER_BIG_BUFFER_SIZE 300 |
| #define XML_PARSER_BUFFER_SIZE 100 |
| #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" |
| |
| /** |
| * XML_PARSER_CHUNK_SIZE |
| * |
| * When calling GROW that's the minimal amount of data |
| * the parser expected to have received. It is not a hard |
| * limit but an optimization when reading strings like Names |
| * It is not strictly needed as long as inputs available characters |
| * are followed by 0, which should be provided by the I/O level |
| */ |
| #define XML_PARSER_CHUNK_SIZE 100 |
| |
| /* |
| * List of XML prefixed PI allowed by W3C specs |
| */ |
| |
| static const char *xmlW3CPIs[] = { |
| "xml-stylesheet", |
| "xml-model", |
| NULL |
| }; |
| |
| |
| /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ |
| static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, |
| const xmlChar **str); |
| |
| static xmlParserErrors |
| xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, |
| xmlSAXHandlerPtr sax, |
| void *user_data, int depth, const xmlChar *URL, |
| const xmlChar *ID, xmlNodePtr *list); |
| |
| static int |
| xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, |
| const char *encoding); |
| #ifdef LIBXML_LEGACY_ENABLED |
| static void |
| xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, |
| xmlNodePtr lastNode); |
| #endif /* LIBXML_LEGACY_ENABLED */ |
| |
| static xmlParserErrors |
| xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, |
| const xmlChar *string, void *user_data, xmlNodePtr *lst); |
| |
| static int |
| xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); |
| |
| /************************************************************************ |
| * * |
| * Some factorized error routines * |
| * * |
| ************************************************************************/ |
| |
| /** |
| * xmlErrAttributeDup: |
| * @ctxt: an XML parser context |
| * @prefix: the attribute prefix |
| * @localname: the attribute localname |
| * |
| * Handle a redefinition of attribute error |
| */ |
| static void |
| xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, |
| const xmlChar * localname) |
| { |
| if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
| (ctxt->instate == XML_PARSER_EOF)) |
| return; |
| if (ctxt != NULL) |
| ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; |
| |
| if (prefix == NULL) |
| __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, |
| XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, |
| (const char *) localname, NULL, NULL, 0, 0, |
| "Attribute %s redefined\n", localname); |
| else |
| __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, |
| XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, |
| (const char *) prefix, (const char *) localname, |
| NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, |
| localname); |
| if (ctxt != NULL) { |
| ctxt->wellFormed = 0; |
| if (ctxt->recovery == 0) |
| ctxt->disableSAX = 1; |
| } |
| } |
| |
| /** |
| * xmlFatalErr: |
| * @ctxt: an XML parser context |
| * @error: the error number |
| * @extra: extra information string |
| * |
| * Handle a fatal parser error, i.e. violating Well-Formedness constraints |
| */ |
| static void |
| xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) |
| { |
| const char *errmsg; |
| char errstr[129] = ""; |
| |
| if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
| (ctxt->instate == XML_PARSER_EOF)) |
| return; |
| switch (error) { |
| case XML_ERR_INVALID_HEX_CHARREF: |
| errmsg = "CharRef: invalid hexadecimal value"; |
| break; |
| case XML_ERR_INVALID_DEC_CHARREF: |
| errmsg = "CharRef: invalid decimal value"; |
| break; |
| case XML_ERR_INVALID_CHARREF: |
| errmsg = "CharRef: invalid value"; |
| break; |
| case XML_ERR_INTERNAL_ERROR: |
| errmsg = "internal error"; |
| break; |
| case XML_ERR_PEREF_AT_EOF: |
| errmsg = "PEReference at end of document"; |
| break; |
| case XML_ERR_PEREF_IN_PROLOG: |
| errmsg = "PEReference in prolog"; |
| break; |
| case XML_ERR_PEREF_IN_EPILOG: |
| errmsg = "PEReference in epilog"; |
| break; |
| case XML_ERR_PEREF_NO_NAME: |
| errmsg = "PEReference: no name"; |
| break; |
| case XML_ERR_PEREF_SEMICOL_MISSING: |
| errmsg = "PEReference: expecting ';'"; |
| break; |
| case XML_ERR_ENTITY_LOOP: |
| errmsg = "Detected an entity reference loop"; |
| break; |
| case XML_ERR_ENTITY_NOT_STARTED: |
| errmsg = "EntityValue: \" or ' expected"; |
| break; |
| case XML_ERR_ENTITY_PE_INTERNAL: |
| errmsg = "PEReferences forbidden in internal subset"; |
| break; |
| case XML_ERR_ENTITY_NOT_FINISHED: |
| errmsg = "EntityValue: \" or ' expected"; |
| break; |
| case XML_ERR_ATTRIBUTE_NOT_STARTED: |
| errmsg = "AttValue: \" or ' expected"; |
| break; |
| case XML_ERR_LT_IN_ATTRIBUTE: |
| errmsg = "Unescaped '<' not allowed in attributes values"; |
| break; |
| case XML_ERR_LITERAL_NOT_STARTED: |
| errmsg = "SystemLiteral \" or ' expected"; |
| break; |
| case XML_ERR_LITERAL_NOT_FINISHED: |
| errmsg = "Unfinished System or Public ID \" or ' expected"; |
| break; |
| case XML_ERR_MISPLACED_CDATA_END: |
| errmsg = "Sequence ']]>' not allowed in content"; |
| break; |
| case XML_ERR_URI_REQUIRED: |
| errmsg = "SYSTEM or PUBLIC, the URI is missing"; |
| break; |
| case XML_ERR_PUBID_REQUIRED: |
| errmsg = "PUBLIC, the Public Identifier is missing"; |
| break; |
| case XML_ERR_HYPHEN_IN_COMMENT: |
| errmsg = "Comment must not contain '--' (double-hyphen)"; |
| break; |
| case XML_ERR_PI_NOT_STARTED: |
| errmsg = "xmlParsePI : no target name"; |
| break; |
| case XML_ERR_RESERVED_XML_NAME: |
| errmsg = "Invalid PI name"; |
| break; |
| case XML_ERR_NOTATION_NOT_STARTED: |
| errmsg = "NOTATION: Name expected here"; |
| break; |
| case XML_ERR_NOTATION_NOT_FINISHED: |
| errmsg = "'>' required to close NOTATION declaration"; |
| break; |
| case XML_ERR_VALUE_REQUIRED: |
| errmsg = "Entity value required"; |
| break; |
| case XML_ERR_URI_FRAGMENT: |
| errmsg = "Fragment not allowed"; |
| break; |
| case XML_ERR_ATTLIST_NOT_STARTED: |
| errmsg = "'(' required to start ATTLIST enumeration"; |
| break; |
| case XML_ERR_NMTOKEN_REQUIRED: |
| errmsg = "NmToken expected in ATTLIST enumeration"; |
| break; |
| case XML_ERR_ATTLIST_NOT_FINISHED: |
| errmsg = "')' required to finish ATTLIST enumeration"; |
| break; |
| case XML_ERR_MIXED_NOT_STARTED: |
| errmsg = "MixedContentDecl : '|' or ')*' expected"; |
| break; |
| case XML_ERR_PCDATA_REQUIRED: |
| errmsg = "MixedContentDecl : '#PCDATA' expected"; |
| break; |
| case XML_ERR_ELEMCONTENT_NOT_STARTED: |
| errmsg = "ContentDecl : Name or '(' expected"; |
| break; |
| case XML_ERR_ELEMCONTENT_NOT_FINISHED: |
| errmsg = "ContentDecl : ',' '|' or ')' expected"; |
| break; |
| case XML_ERR_PEREF_IN_INT_SUBSET: |
| errmsg = |
| "PEReference: forbidden within markup decl in internal subset"; |
| break; |
| case XML_ERR_GT_REQUIRED: |
| errmsg = "expected '>'"; |
| break; |
| case XML_ERR_CONDSEC_INVALID: |
| errmsg = "XML conditional section '[' expected"; |
| break; |
| case XML_ERR_EXT_SUBSET_NOT_FINISHED: |
| errmsg = "Content error in the external subset"; |
| break; |
| case XML_ERR_CONDSEC_INVALID_KEYWORD: |
| errmsg = |
| "conditional section INCLUDE or IGNORE keyword expected"; |
| break; |
| case XML_ERR_CONDSEC_NOT_FINISHED: |
| errmsg = "XML conditional section not closed"; |
| break; |
| case XML_ERR_XMLDECL_NOT_STARTED: |
| errmsg = "Text declaration '<?xml' required"; |
| break; |
| case XML_ERR_XMLDECL_NOT_FINISHED: |
| errmsg = "parsing XML declaration: '?>' expected"; |
| break; |
| case XML_ERR_EXT_ENTITY_STANDALONE: |
| errmsg = "external parsed entities cannot be standalone"; |
| break; |
| case XML_ERR_ENTITYREF_SEMICOL_MISSING: |
| errmsg = "EntityRef: expecting ';'"; |
| break; |
| case XML_ERR_DOCTYPE_NOT_FINISHED: |
| errmsg = "DOCTYPE improperly terminated"; |
| break; |
| case XML_ERR_LTSLASH_REQUIRED: |
| errmsg = "EndTag: '</' not found"; |
| break; |
| case XML_ERR_EQUAL_REQUIRED: |
| errmsg = "expected '='"; |
| break; |
| case XML_ERR_STRING_NOT_CLOSED: |
| errmsg = "String not closed expecting \" or '"; |
| break; |
| case XML_ERR_STRING_NOT_STARTED: |
| errmsg = "String not started expecting ' or \""; |
| break; |
| case XML_ERR_ENCODING_NAME: |
| errmsg = "Invalid XML encoding name"; |
| break; |
| case XML_ERR_STANDALONE_VALUE: |
| errmsg = "standalone accepts only 'yes' or 'no'"; |
| break; |
| case XML_ERR_DOCUMENT_EMPTY: |
| errmsg = "Document is empty"; |
| break; |
| case XML_ERR_DOCUMENT_END: |
| errmsg = "Extra content at the end of the document"; |
| break; |
| case XML_ERR_NOT_WELL_BALANCED: |
| errmsg = "chunk is not well balanced"; |
| break; |
| case XML_ERR_EXTRA_CONTENT: |
| errmsg = "extra content at the end of well balanced chunk"; |
| break; |
| case XML_ERR_VERSION_MISSING: |
| errmsg = "Malformed declaration expecting version"; |
| break; |
| case XML_ERR_NAME_TOO_LONG: |
| errmsg = "Name too long use XML_PARSE_HUGE option"; |
| break; |
| #if 0 |
| case: |
| errmsg = ""; |
| break; |
| #endif |
| default: |
| errmsg = "Unregistered error message"; |
| } |
| if (info == NULL) |
| snprintf(errstr, 128, "%s\n", errmsg); |
| else |
| snprintf(errstr, 128, "%s: %%s\n", errmsg); |
| if (ctxt != NULL) |
| ctxt->errNo = error; |
| __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, |
| XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0], |
| info); |
| if (ctxt != NULL) { |
| ctxt->wellFormed = 0; |
| if (ctxt->recovery == 0) |
| ctxt->disableSAX = 1; |
| } |
| } |
| |
| /** |
| * xmlFatalErrMsg: |
| * @ctxt: an XML parser context |
| * @error: the error number |
| * @msg: the error message |
| * |
| * Handle a fatal parser error, i.e. violating Well-Formedness constraints |
| */ |
| static void |
| xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
| const char *msg) |
| { |
| if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
| (ctxt->instate == XML_PARSER_EOF)) |
| return; |
| if (ctxt != NULL) |
| ctxt->errNo = error; |
| __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, |
| XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg); |
| if (ctxt != NULL) { |
| ctxt->wellFormed = 0; |
| if (ctxt->recovery == 0) |
| ctxt->disableSAX = 1; |
| } |
| } |
| |
| /** |
| * xmlWarningMsg: |
| * @ctxt: an XML parser context |
| * @error: the error number |
| * @msg: the error message |
| * @str1: extra data |
| * @str2: extra data |
| * |
| * Handle a warning. |
| */ |
| static void |
| xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
| const char *msg, const xmlChar *str1, const xmlChar *str2) |
| { |
| xmlStructuredErrorFunc schannel = NULL; |
| |
| if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
| (ctxt->instate == XML_PARSER_EOF)) |
| return; |
| if ((ctxt != NULL) && (ctxt->sax != NULL) && |
| (ctxt->sax->initialized == XML_SAX2_MAGIC)) |
| schannel = ctxt->sax->serror; |
| if (ctxt != NULL) { |
| __xmlRaiseError(schannel, |
| (ctxt->sax) ? ctxt->sax->warning : NULL, |
| ctxt->userData, |
| ctxt, NULL, XML_FROM_PARSER, error, |
| XML_ERR_WARNING, NULL, 0, |
| (const char *) str1, (const char *) str2, NULL, 0, 0, |
| msg, (const char *) str1, (const char *) str2); |
| } else { |
| __xmlRaiseError(schannel, NULL, NULL, |
| ctxt, NULL, XML_FROM_PARSER, error, |
| XML_ERR_WARNING, NULL, 0, |
| (const char *) str1, (const char *) str2, NULL, 0, 0, |
| msg, (const char *) str1, (const char *) str2); |
| } |
| } |
| |
| /** |
| * xmlValidityError: |
| * @ctxt: an XML parser context |
| * @error: the error number |
| * @msg: the error message |
| * @str1: extra data |
| * |
| * Handle a validity error. |
| */ |
| static void |
| xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
| const char *msg, const xmlChar *str1, const xmlChar *str2) |
| { |
| xmlStructuredErrorFunc schannel = NULL; |
| |
| if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
| (ctxt->instate == XML_PARSER_EOF)) |
| return; |
| if (ctxt != NULL) { |
| ctxt->errNo = error; |
| if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) |
| schannel = ctxt->sax->serror; |
| } |
| if (ctxt != NULL) { |
| __xmlRaiseError(schannel, |
| ctxt->vctxt.error, ctxt->vctxt.userData, |
| ctxt, NULL, XML_FROM_DTD, error, |
| XML_ERR_ERROR, NULL, 0, (const char *) str1, |
| (const char *) str2, NULL, 0, 0, |
| msg, (const char *) str1, (const char *) str2); |
| ctxt->valid = 0; |
| } else { |
| __xmlRaiseError(schannel, NULL, NULL, |
| ctxt, NULL, XML_FROM_DTD, error, |
| XML_ERR_ERROR, NULL, 0, (const char *) str1, |
| (const char *) str2, NULL, 0, 0, |
| msg, (const char *) str1, (const char *) str2); |
| } |
| } |
| |
| /** |
| * xmlFatalErrMsgInt: |
| * @ctxt: an XML parser context |
| * @error: the error number |
| * @msg: the error message |
| * @val: an integer value |
| * |
| * Handle a fatal parser error, i.e. violating Well-Formedness constraints |
| */ |
| static void |
| xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
| const char *msg, int val) |
| { |
| if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
| (ctxt->instate == XML_PARSER_EOF)) |
| return; |
| if (ctxt != NULL) |
| ctxt->errNo = error; |
| __xmlRaiseError(NULL, NULL, NULL, |
| ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, |
| NULL, 0, NULL, NULL, NULL, val, 0, msg, val); |
| if (ctxt != NULL) { |
| ctxt->wellFormed = 0; |
| if (ctxt->recovery == 0) |
| ctxt->disableSAX = 1; |
| } |
| } |
| |
| /** |
| * xmlFatalErrMsgStrIntStr: |
| * @ctxt: an XML parser context |
| * @error: the error number |
| * @msg: the error message |
| * @str1: an string info |
| * @val: an integer value |
| * @str2: an string info |
| * |
| * Handle a fatal parser error, i.e. violating Well-Formedness constraints |
| */ |
| static void |
| xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
| const char *msg, const xmlChar *str1, int val, |
| const xmlChar *str2) |
| { |
| if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
| (ctxt->instate == XML_PARSER_EOF)) |
| return; |
| if (ctxt != NULL) |
| ctxt->errNo = error; |
| __xmlRaiseError(NULL, NULL, NULL, |
| ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, |
| NULL, 0, (const char *) str1, (const char *) str2, |
| NULL, val, 0, msg, str1, val, str2); |
| if (ctxt != NULL) { |
| ctxt->wellFormed = 0; |
| if (ctxt->recovery == 0) |
| ctxt->disableSAX = 1; |
| } |
| } |
| |
| /** |
| * xmlFatalErrMsgStr: |
| * @ctxt: an XML parser context |
| * @error: the error number |
| * @msg: the error message |
| * @val: a string value |
| * |
| * Handle a fatal parser error, i.e. violating Well-Formedness constraints |
| */ |
| static void |
| xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
| const char *msg, const xmlChar * val) |
| { |
| if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
| (ctxt->instate == XML_PARSER_EOF)) |
| return; |
| if (ctxt != NULL) |
| ctxt->errNo = error; |
| __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, |
| XML_FROM_PARSER, error, XML_ERR_FATAL, |
| NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, |
| val); |
| if (ctxt != NULL) { |
| ctxt->wellFormed = 0; |
| if (ctxt->recovery == 0) |
| ctxt->disableSAX = 1; |
| } |
| } |
| |
| /** |
| * xmlErrMsgStr: |
| * @ctxt: an XML parser context |
| * @error: the error number |
| * @msg: the error message |
| * @val: a string value |
| * |
| * Handle a non fatal parser error |
| */ |
| static void |
| xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
| const char *msg, const xmlChar * val) |
| { |
| if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
| (ctxt->instate == XML_PARSER_EOF)) |
| return; |
| if (ctxt != NULL) |
| ctxt->errNo = error; |
| __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, |
| XML_FROM_PARSER, error, XML_ERR_ERROR, |
| NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, |
| val); |
| } |
| |
| /** |
| * xmlNsErr: |
| * @ctxt: an XML parser context |
| * @error: the error number |
| * @msg: the message |
| * @info1: extra information string |
| * @info2: extra information string |
| * |
| * Handle a fatal parser error, i.e. violating Well-Formedness constraints |
| */ |
| static void |
| xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
| const char *msg, |
| const xmlChar * info1, const xmlChar * info2, |
| const xmlChar * info3) |
| { |
| if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
| (ctxt->instate == XML_PARSER_EOF)) |
| return; |
| if (ctxt != NULL) |
| ctxt->errNo = error; |
| __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, |
| XML_ERR_ERROR, NULL, 0, (const char *) info1, |
| (const char *) info2, (const char *) info3, 0, 0, msg, |
| info1, info2, info3); |
| if (ctxt != NULL) |
| ctxt->nsWellFormed = 0; |
| } |
| |
| /** |
| * xmlNsWarn |
| * @ctxt: an XML parser context |
| * @error: the error number |
| * @msg: the message |
| * @info1: extra information string |
| * @info2: extra information string |
| * |
| * Handle a namespace warning error |
| */ |
| static void |
| xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
| const char *msg, |
| const xmlChar * info1, const xmlChar * info2, |
| const xmlChar * info3) |
| { |
| if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
| (ctxt->instate == XML_PARSER_EOF)) |
| return; |
| __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, |
| XML_ERR_WARNING, NULL, 0, (const char *) info1, |
| (const char *) info2, (const char *) info3, 0, 0, msg, |
| info1, info2, info3); |
| } |
| |
| /************************************************************************ |
| * * |
| * Library wide options * |
| * * |
| ************************************************************************/ |
| |
| /** |
| * xmlHasFeature: |
| * @feature: the feature to be examined |
| * |
| * Examines if the library has been compiled with a given feature. |
| * |
| * Returns a non-zero value if the feature exist, otherwise zero. |
| * Returns zero (0) if the feature does not exist or an unknown |
| * unknown feature is requested, non-zero otherwise. |
| */ |
| int |
| xmlHasFeature(xmlFeature feature) |
| { |
| switch (feature) { |
| case XML_WITH_THREAD: |
| #ifdef LIBXML_THREAD_ENABLED |
| return(1); |
| #else |
| return(0); |
| #endif |
| case XML_WITH_TREE: |
| #ifdef LIBXML_TREE_ENABLED |
| return(1); |
| #else |
| return(0); |
| #endif |
| case XML_WITH_OUTPUT: |
| #ifdef LIBXML_OUTPUT_ENABLED |
| return(1); |
| #else |
| return(0); |
| #endif |
| case XML_WITH_PUSH: |
| #ifdef LIBXML_PUSH_ENABLED |
| return(1); |
| #else |
| return(0); |
| #endif |
| case XML_WITH_READER: |
| #ifdef LIBXML_READER_ENABLED |
| return(1); |
| #else |
| return(0); |
| #endif |
| case XML_WITH_PATTERN: |
| #ifdef LIBXML_PATTERN_ENABLED |
| return(1); |
| #else |
| return(0); |
| #endif |
| case XML_WITH_WRITER: |
| #ifdef LIBXML_WRITER_ENABLED |
| return(1); |
| #else |
| return(0); |
| #endif |
| case XML_WITH_SAX1: |
| #ifdef LIBXML_SAX1_ENABLED |
| return(1); |
| #else |
| return(0); |
| #endif |
| case XML_WITH_FTP: |
| #ifdef LIBXML_FTP_ENABLED |
| return(1); |
| #else |
| return(0); |
| #endif |
| case XML_WITH_HTTP: |
| #ifdef LIBXML_HTTP_ENABLED |
| return(1); |
| #else |
| return(0); |
| #endif |
| case XML_WITH_VALID: |
| #ifdef LIBXML_VALID_ENABLED |
| return(1); |
| #else |
| return(0); |
| #endif |
| case XML_WITH_HTML: |
| #ifdef LIBXML_HTML_ENABLED |
| return(1); |
| #else |
| return(0); |
| #endif |
| case XML_WITH_LEGACY: |
| #ifdef LIBXML_LEGACY_ENABLED |
| return(1); |
| #else |
| return(0); |
| #endif |
| case XML_WITH_C14N: |
| #ifdef LIBXML_C14N_ENABLED |
| return(1); |
| #else |
| return(0); |
| #endif |
| case XML_WITH_CATALOG: |
| #ifdef LIBXML_CATALOG_ENABLED |
| return(1); |
| #else |
| return(0); |
| #endif |
| case XML_WITH_XPATH: |
| #ifdef LIBXML_XPATH_ENABLED |
| return(1); |
| #else |
| return(0); |
| #endif |
| case XML_WITH_XPTR: |
| #ifdef LIBXML_XPTR_ENABLED |
| return(1); |
| #else |
| return(0); |
| #endif |
| case XML_WITH_XINCLUDE: |
| #ifdef LIBXML_XINCLUDE_ENABLED |
| return(1); |
| #else |
| return(0); |
| #endif |
| case XML_WITH_ICONV: |
| #ifdef LIBXML_ICONV_ENABLED |
| return(1); |
| #else |
| return(0); |
| #endif |
| case XML_WITH_ISO8859X: |
| #ifdef LIBXML_ISO8859X_ENABLED |
| return(1); |
| #else |
| return(0); |
| #endif |
| case XML_WITH_UNICODE: |
| #ifdef LIBXML_UNICODE_ENABLED |
| return(1); |
| #else |
| return(0); |
| #endif |
| case XML_WITH_REGEXP: |
| #ifdef LIBXML_REGEXP_ENABLED |
| return(1); |
| #else |
| return(0); |
| #endif |
| case XML_WITH_AUTOMATA: |
| #ifdef LIBXML_AUTOMATA_ENABLED |
| return(1); |
| #else |
| return(0); |
| #endif |
| case XML_WITH_EXPR: |
| #ifdef LIBXML_EXPR_ENABLED |
| return(1); |
| #else |
| return(0); |
| #endif |
| case XML_WITH_SCHEMAS: |
| #ifdef LIBXML_SCHEMAS_ENABLED |
| return(1); |
| #else |
| return(0); |
| #endif |
| case XML_WITH_SCHEMATRON: |
| #ifdef LIBXML_SCHEMATRON_ENABLED |
| return(1); |
| #else |
| return(0); |
| #endif |
| case XML_WITH_MODULES: |
| #ifdef LIBXML_MODULES_ENABLED |
| return(1); |
| #else |
| return(0); |
| #endif |
| case XML_WITH_DEBUG: |
| #ifdef LIBXML_DEBUG_ENABLED |
| return(1); |
| #else |
| return(0); |
| #endif |
| case XML_WITH_DEBUG_MEM: |
| #ifdef DEBUG_MEMORY_LOCATION |
| return(1); |
| #else |
| return(0); |
| #endif |
| case XML_WITH_DEBUG_RUN: |
| #ifdef LIBXML_DEBUG_RUNTIME |
| return(1); |
| #else |
| return(0); |
| #endif |
| case XML_WITH_ZLIB: |
| #ifdef LIBXML_ZLIB_ENABLED |
| return(1); |
| #else |
| return(0); |
| #endif |
| case XML_WITH_LZMA: |
| #ifdef LIBXML_LZMA_ENABLED |
| return(1); |
| #else |
| return(0); |
| #endif |
| case XML_WITH_ICU: |
| #ifdef LIBXML_ICU_ENABLED |
| return(1); |
| #else |
| return(0); |
| #endif |
| default: |
| break; |
| } |
| return(0); |
| } |
| |
| /************************************************************************ |
| * * |
| * SAX2 defaulted attributes handling * |
| * * |
| ************************************************************************/ |
| |
| /** |
| * xmlDetectSAX2: |
| * @ctxt: an XML parser context |
| * |
| * Do the SAX2 detection and specific intialization |
| */ |
| static void |
| xmlDetectSAX2(xmlParserCtxtPtr ctxt) { |
| if (ctxt == NULL) return; |
| #ifdef LIBXML_SAX1_ENABLED |
| if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) && |
| ((ctxt->sax->startElementNs != NULL) || |
| (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1; |
| #else |
| ctxt->sax2 = 1; |
| #endif /* LIBXML_SAX1_ENABLED */ |
| |
| ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); |
| ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); |
| ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); |
| if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || |
| (ctxt->str_xml_ns == NULL)) { |
| xmlErrMemory(ctxt, NULL); |
| } |
| } |
| |
| typedef struct _xmlDefAttrs xmlDefAttrs; |
| typedef xmlDefAttrs *xmlDefAttrsPtr; |
| struct _xmlDefAttrs { |
| int nbAttrs; /* number of defaulted attributes on that element */ |
| int maxAttrs; /* the size of the array */ |
| const xmlChar *values[5]; /* array of localname/prefix/values/external */ |
| }; |
| |
| /** |
| * xmlAttrNormalizeSpace: |
| * @src: the source string |
| * @dst: the target string |
| * |
| * Normalize the space in non CDATA attribute values: |
| * If the attribute type is not CDATA, then the XML processor MUST further |
| * process the normalized attribute value by discarding any leading and |
| * trailing space (#x20) characters, and by replacing sequences of space |
| * (#x20) characters by a single space (#x20) character. |
| * Note that the size of dst need to be at least src, and if one doesn't need |
| * to preserve dst (and it doesn't come from a dictionary or read-only) then |
| * passing src as dst is just fine. |
| * |
| * Returns a pointer to the normalized value (dst) or NULL if no conversion |
| * is needed. |
| */ |
| static xmlChar * |
| xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) |
| { |
| if ((src == NULL) || (dst == NULL)) |
| return(NULL); |
| |
| while (*src == 0x20) src++; |
| while (*src != 0) { |
| if (*src == 0x20) { |
| while (*src == 0x20) src++; |
| if (*src != 0) |
| *dst++ = 0x20; |
| } else { |
| *dst++ = *src++; |
| } |
| } |
| *dst = 0; |
| if (dst == src) |
| return(NULL); |
| return(dst); |
| } |
| |
| /** |
| * xmlAttrNormalizeSpace2: |
| * @src: the source string |
| * |
| * Normalize the space in non CDATA attribute values, a slightly more complex |
| * front end to avoid allocation problems when running on attribute values |
| * coming from the input. |
| * |
| * Returns a pointer to the normalized value (dst) or NULL if no conversion |
| * is needed. |
| */ |
| static const xmlChar * |
| xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len) |
| { |
| int i; |
| int remove_head = 0; |
| int need_realloc = 0; |
| const xmlChar *cur; |
| |
| if ((ctxt == NULL) || (src == NULL) || (len == NULL)) |
| return(NULL); |
| i = *len; |
| if (i <= 0) |
| return(NULL); |
| |
| cur = src; |
| while (*cur == 0x20) { |
| cur++; |
| remove_head++; |
| } |
| while (*cur != 0) { |
| if (*cur == 0x20) { |
| cur++; |
| if ((*cur == 0x20) || (*cur == 0)) { |
| need_realloc = 1; |
| break; |
| } |
| } else |
| cur++; |
| } |
| if (need_realloc) { |
| xmlChar *ret; |
| |
| ret = xmlStrndup(src + remove_head, i - remove_head + 1); |
| if (ret == NULL) { |
| xmlErrMemory(ctxt, NULL); |
| return(NULL); |
| } |
| xmlAttrNormalizeSpace(ret, ret); |
| *len = (int) strlen((const char *)ret); |
| return(ret); |
| } else if (remove_head) { |
| *len -= remove_head; |
| memmove(src, src + remove_head, 1 + *len); |
| return(src); |
| } |
| return(NULL); |
| } |
| |
| /** |
| * xmlAddDefAttrs: |
| * @ctxt: an XML parser context |
| * @fullname: the element fullname |
| * @fullattr: the attribute fullname |
| * @value: the attribute value |
| * |
| * Add a defaulted attribute for an element |
| */ |
| static void |
| xmlAddDefAttrs(xmlParserCtxtPtr ctxt, |
| const xmlChar *fullname, |
| const xmlChar *fullattr, |
| const xmlChar *value) { |
| xmlDefAttrsPtr defaults; |
| int len; |
| const xmlChar *name; |
| const xmlChar *prefix; |
| |
| /* |
| * Allows to detect attribute redefinitions |
| */ |
| if (ctxt->attsSpecial != NULL) { |
| if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) |
| return; |
| } |
| |
| if (ctxt->attsDefault == NULL) { |
| ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict); |
| if (ctxt->attsDefault == NULL) |
| goto mem_error; |
| } |
| |
| /* |
| * split the element name into prefix:localname , the string found |
| * are within the DTD and then not associated to namespace names. |
| */ |
| name = xmlSplitQName3(fullname, &len); |
| if (name == NULL) { |
| name = xmlDictLookup(ctxt->dict, fullname, -1); |
| prefix = NULL; |
| } else { |
| name = xmlDictLookup(ctxt->dict, name, -1); |
| prefix = xmlDictLookup(ctxt->dict, fullname, len); |
| } |
| |
| /* |
| * make sure there is some storage |
| */ |
| defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); |
| if (defaults == NULL) { |
| defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + |
| (4 * 5) * sizeof(const xmlChar *)); |
| if (defaults == NULL) |
| goto mem_error; |
| defaults->nbAttrs = 0; |
| defaults->maxAttrs = 4; |
| if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, |
| defaults, NULL) < 0) { |
| xmlFree(defaults); |
| goto mem_error; |
| } |
| } else if (defaults->nbAttrs >= defaults->maxAttrs) { |
| xmlDefAttrsPtr temp; |
| |
| temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + |
| (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *)); |
| if (temp == NULL) |
| goto mem_error; |
| defaults = temp; |
| defaults->maxAttrs *= 2; |
| if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, |
| defaults, NULL) < 0) { |
| xmlFree(defaults); |
| goto mem_error; |
| } |
| } |
| |
| /* |
| * Split the element name into prefix:localname , the string found |
| * are within the DTD and hen not associated to namespace names. |
| */ |
| name = xmlSplitQName3(fullattr, &len); |
| if (name == NULL) { |
| name = xmlDictLookup(ctxt->dict, fullattr, -1); |
| prefix = NULL; |
| } else { |
| name = xmlDictLookup(ctxt->dict, name, -1); |
| prefix = xmlDictLookup(ctxt->dict, fullattr, len); |
| } |
| |
| defaults->values[5 * defaults->nbAttrs] = name; |
| defaults->values[5 * defaults->nbAttrs + 1] = prefix; |
| /* intern the string and precompute the end */ |
| len = xmlStrlen(value); |
| value = xmlDictLookup(ctxt->dict, value, len); |
| defaults->values[5 * defaults->nbAttrs + 2] = value; |
| defaults->values[5 * defaults->nbAttrs + 3] = value + len; |
| if (ctxt->external) |
| defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external"; |
| else |
| defaults->values[5 * defaults->nbAttrs + 4] = NULL; |
| defaults->nbAttrs++; |
| |
| return; |
| |
| mem_error: |
| xmlErrMemory(ctxt, NULL); |
| return; |
| } |
| |
| /** |
| * xmlAddSpecialAttr: |
| * @ctxt: an XML parser context |
| * @fullname: the element fullname |
| * @fullattr: the attribute fullname |
| * @type: the attribute type |
| * |
| * Register this attribute type |
| */ |
| static void |
| xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, |
| const xmlChar *fullname, |
| const xmlChar *fullattr, |
| int type) |
| { |
| if (ctxt->attsSpecial == NULL) { |
| ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict); |
| if (ctxt->attsSpecial == NULL) |
| goto mem_error; |
| } |
| |
| if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) |
| return; |
| |
| xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, |
| (void *) (long) type); |
| return; |
| |
| mem_error: |
| xmlErrMemory(ctxt, NULL); |
| return; |
| } |
| |
| /** |
| * xmlCleanSpecialAttrCallback: |
| * |
| * Removes CDATA attributes from the special attribute table |
| */ |
| static void |
| xmlCleanSpecialAttrCallback(void *payload, void *data, |
| const xmlChar *fullname, const xmlChar *fullattr, |
| const xmlChar *unused ATTRIBUTE_UNUSED) { |
| xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data; |
| |
| if (((long) payload) == XML_ATTRIBUTE_CDATA) { |
| xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL); |
| } |
| } |
| |
| /** |
| * xmlCleanSpecialAttr: |
| * @ctxt: an XML parser context |
| * |
| * Trim the list of attributes defined to remove all those of type |
| * CDATA as they are not special. This call should be done when finishing |
| * to parse the DTD and before starting to parse the document root. |
| */ |
| static void |
| xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt) |
| { |
| if (ctxt->attsSpecial == NULL) |
| return; |
| |
| xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt); |
| |
| if (xmlHashSize(ctxt->attsSpecial) == 0) { |
| xmlHashFree(ctxt->attsSpecial, NULL); |
| ctxt->attsSpecial = NULL; |
| } |
| return; |
| } |
| |
| /** |
| * xmlCheckLanguageID: |
| * @lang: pointer to the string value |
| * |
| * Checks that the value conforms to the LanguageID production: |
| * |
| * NOTE: this is somewhat deprecated, those productions were removed from |
| * the XML Second edition. |
| * |
| * [33] LanguageID ::= Langcode ('-' Subcode)* |
| * [34] Langcode ::= ISO639Code | IanaCode | UserCode |
| * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) |
| * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ |
| * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ |
| * [38] Subcode ::= ([a-z] | [A-Z])+ |
| * |
| * The current REC reference the sucessors of RFC 1766, currently 5646 |
| * |
| * http://www.rfc-editor.org/rfc/rfc5646.txt |
| * langtag = language |
| * ["-" script] |
| * ["-" region] |
| * *("-" variant) |
| * *("-" extension) |
| * ["-" privateuse] |
| * language = 2*3ALPHA ; shortest ISO 639 code |
| * ["-" extlang] ; sometimes followed by |
| * ; extended language subtags |
| * / 4ALPHA ; or reserved for future use |
| * / 5*8ALPHA ; or registered language subtag |
| * |
| * extlang = 3ALPHA ; selected ISO 639 codes |
| * *2("-" 3ALPHA) ; permanently reserved |
| * |
| * script = 4ALPHA ; ISO 15924 code |
| * |
| * region = 2ALPHA ; ISO 3166-1 code |
| * / 3DIGIT ; UN M.49 code |
| * |
| * variant = 5*8alphanum ; registered variants |
| * / (DIGIT 3alphanum) |
| * |
| * extension = singleton 1*("-" (2*8alphanum)) |
| * |
| * ; Single alphanumerics |
| * ; "x" reserved for private use |
| * singleton = DIGIT ; 0 - 9 |
| * / %x41-57 ; A - W |
| * / %x59-5A ; Y - Z |
| * / %x61-77 ; a - w |
| * / %x79-7A ; y - z |
| * |
| * it sounds right to still allow Irregular i-xxx IANA and user codes too |
| * The parser below doesn't try to cope with extension or privateuse |
| * that could be added but that's not interoperable anyway |
| * |
| * Returns 1 if correct 0 otherwise |
| **/ |
| int |
| xmlCheckLanguageID(const xmlChar * lang) |
| { |
| const xmlChar *cur = lang, *nxt; |
| |
| if (cur == NULL) |
| return (0); |
| if (((cur[0] == 'i') && (cur[1] == '-')) || |
| ((cur[0] == 'I') && (cur[1] == '-')) || |
| ((cur[0] == 'x') && (cur[1] == '-')) || |
| ((cur[0] == 'X') && (cur[1] == '-'))) { |
| /* |
| * Still allow IANA code and user code which were coming |
| * from the previous version of the XML-1.0 specification |
| * it's deprecated but we should not fail |
| */ |
| cur += 2; |
| while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || |
| ((cur[0] >= 'a') && (cur[0] <= 'z'))) |
| cur++; |
| return(cur[0] == 0); |
| } |
| nxt = cur; |
| while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || |
| ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) |
| nxt++; |
| if (nxt - cur >= 4) { |
| /* |
| * Reserved |
| */ |
| if ((nxt - cur > 8) || (nxt[0] != 0)) |
| return(0); |
| return(1); |
| } |
| if (nxt - cur < 2) |
| return(0); |
| /* we got an ISO 639 code */ |
| if (nxt[0] == 0) |
| return(1); |
| if (nxt[0] != '-') |
| return(0); |
| |
| nxt++; |
| cur = nxt; |
| /* now we can have extlang or script or region or variant */ |
| if ((nxt[0] >= '0') && (nxt[0] <= '9')) |
| goto region_m49; |
| |
| while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || |
| ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) |
| nxt++; |
| if (nxt - cur == 4) |
| goto script; |
| if (nxt - cur == 2) |
| goto region; |
| if ((nxt - cur >= 5) && (nxt - cur <= 8)) |
| goto variant; |
| if (nxt - cur != 3) |
| return(0); |
| /* we parsed an extlang */ |
| if (nxt[0] == 0) |
| return(1); |
| if (nxt[0] != '-') |
| return(0); |
| |
| nxt++; |
| cur = nxt; |
| /* now we can have script or region or variant */ |
| if ((nxt[0] >= '0') && (nxt[0] <= '9')) |
| goto region_m49; |
| |
| while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || |
| ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) |
| nxt++; |
| if (nxt - cur == 2) |
| goto region; |
| if ((nxt - cur >= 5) && (nxt - cur <= 8)) |
| goto variant; |
| if (nxt - cur != 4) |
| return(0); |
| /* we parsed a script */ |
| script: |
| if (nxt[0] == 0) |
| return(1); |
| if (nxt[0] != '-') |
| return(0); |
| |
| nxt++; |
| cur = nxt; |
| /* now we can have region or variant */ |
| if ((nxt[0] >= '0') && (nxt[0] <= '9')) |
| goto region_m49; |
| |
| while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || |
| ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) |
| nxt++; |
| |
| if ((nxt - cur >= 5) && (nxt - cur <= 8)) |
| goto variant; |
| if (nxt - cur != 2) |
| return(0); |
| /* we parsed a region */ |
| region: |
| if (nxt[0] == 0) |
| return(1); |
| if (nxt[0] != '-') |
| return(0); |
| |
| nxt++; |
| cur = nxt; |
| /* now we can just have a variant */ |
| while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || |
| ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) |
| nxt++; |
| |
| if ((nxt - cur < 5) || (nxt - cur > 8)) |
| return(0); |
| |
| /* we parsed a variant */ |
| variant: |
| if (nxt[0] == 0) |
| return(1); |
| if (nxt[0] != '-') |
| return(0); |
| /* extensions and private use subtags not checked */ |
| return (1); |
| |
| region_m49: |
| if (((nxt[1] >= '0') && (nxt[1] <= '9')) && |
| ((nxt[2] >= '0') && (nxt[2] <= '9'))) { |
| nxt += 3; |
| goto region; |
| } |
| return(0); |
| } |
| |
| /************************************************************************ |
| * * |
| * Parser stacks related functions and macros * |
| * * |
| ************************************************************************/ |
| |
| static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, |
| const xmlChar ** str); |
| |
| #ifdef SAX2 |
| /** |
| * nsPush: |
| * @ctxt: an XML parser context |
| * @prefix: the namespace prefix or NULL |
| * @URL: the namespace name |
| * |
| * Pushes a new parser namespace on top of the ns stack |
| * |
| * Returns -1 in case of error, -2 if the namespace should be discarded |
| * and the index in the stack otherwise. |
| */ |
| static int |
| nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) |
| { |
| if (ctxt->options & XML_PARSE_NSCLEAN) { |
| int i; |
| for (i = ctxt->nsNr - 2;i >= 0;i -= 2) { |
| if (ctxt->nsTab[i] == prefix) { |
| /* in scope */ |
| if (ctxt->nsTab[i + 1] == URL) |
| return(-2); |
| /* out of scope keep it */ |
| break; |
| } |
| } |
| } |
| if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { |
| ctxt->nsMax = 10; |
| ctxt->nsNr = 0; |
| ctxt->nsTab = (const xmlChar **) |
| xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); |
| if (ctxt->nsTab == NULL) { |
| xmlErrMemory(ctxt, NULL); |
| ctxt->nsMax = 0; |
| return (-1); |
| } |
| } else if (ctxt->nsNr >= ctxt->nsMax) { |
| const xmlChar ** tmp; |
| ctxt->nsMax *= 2; |
| tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab, |
| ctxt->nsMax * sizeof(ctxt->nsTab[0])); |
| if (tmp == NULL) { |
| xmlErrMemory(ctxt, NULL); |
| ctxt->nsMax /= 2; |
| return (-1); |
| } |
| ctxt->nsTab = tmp; |
| } |
| ctxt->nsTab[ctxt->nsNr++] = prefix; |
| ctxt->nsTab[ctxt->nsNr++] = URL; |
| return (ctxt->nsNr); |
| } |
| /** |
| * nsPop: |
| * @ctxt: an XML parser context |
| * @nr: the number to pop |
| * |
| * Pops the top @nr parser prefix/namespace from the ns stack |
| * |
| * Returns the number of namespaces removed |
| */ |
| static int |
| nsPop(xmlParserCtxtPtr ctxt, int nr) |
| { |
| int i; |
| |
| if (ctxt->nsTab == NULL) return(0); |
| if (ctxt->nsNr < nr) { |
| xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); |
| nr = ctxt->nsNr; |
| } |
| if (ctxt->nsNr <= 0) |
| return (0); |
| |
| for (i = 0;i < nr;i++) { |
| ctxt->nsNr--; |
| ctxt->nsTab[ctxt->nsNr] = NULL; |
| } |
| return(nr); |
| } |
| #endif |
| |
| static int |
| xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { |
| const xmlChar **atts; |
| int *attallocs; |
| int maxatts; |
| |
| if (ctxt->atts == NULL) { |
| maxatts = 55; /* allow for 10 attrs by default */ |
| atts = (const xmlChar **) |
| xmlMalloc(maxatts * sizeof(xmlChar *)); |
| if (atts == NULL) goto mem_error; |
| ctxt->atts = atts; |
| attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); |
| if (attallocs == NULL) goto mem_error; |
| ctxt->attallocs = attallocs; |
| ctxt->maxatts = maxatts; |
| } else if (nr + 5 > ctxt->maxatts) { |
| maxatts = (nr + 5) * 2; |
| atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, |
| maxatts * sizeof(const xmlChar *)); |
| if (atts == NULL) goto mem_error; |
| ctxt->atts = atts; |
| attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, |
| (maxatts / 5) * sizeof(int)); |
| if (attallocs == NULL) goto mem_error; |
| ctxt->attallocs = attallocs; |
| ctxt->maxatts = maxatts; |
| } |
| return(ctxt->maxatts); |
| mem_error: |
| xmlErrMemory(ctxt, NULL); |
| return(-1); |
| } |
| |
| /** |
| * inputPush: |
| * @ctxt: an XML parser context |
| * @value: the parser input |
| * |
| * Pushes a new parser input on top of the input stack |
| * |
| * Returns -1 in case of error, the index in the stack otherwise |
| */ |
| int |
| inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) |
| { |
| if ((ctxt == NULL) || (value == NULL)) |
| return(-1); |
| if (ctxt->inputNr >= ctxt->inputMax) { |
| ctxt->inputMax *= 2; |
| ctxt->inputTab = |
| (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, |
| ctxt->inputMax * |
| sizeof(ctxt->inputTab[0])); |
| if (ctxt->inputTab == NULL) { |
| xmlErrMemory(ctxt, NULL); |
| xmlFreeInputStream(value); |
| ctxt->inputMax /= 2; |
| value = NULL; |
| return (-1); |
| } |
| } |
| ctxt->inputTab[ctxt->inputNr] = value; |
| ctxt->input = value; |
| return (ctxt->inputNr++); |
| } |
| /** |
| * inputPop: |
| * @ctxt: an XML parser context |
| * |
| * Pops the top parser input from the input stack |
| * |
| * Returns the input just removed |
| */ |
| xmlParserInputPtr |
| inputPop(xmlParserCtxtPtr ctxt) |
| { |
| xmlParserInputPtr ret; |
| |
| if (ctxt == NULL) |
| return(NULL); |
| if (ctxt->inputNr <= 0) |
| return (NULL); |
| ctxt->inputNr--; |
| if (ctxt->inputNr > 0) |
| ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; |
| else |
| ctxt->input = NULL; |
| ret = ctxt->inputTab[ctxt->inputNr]; |
| ctxt->inputTab[ctxt->inputNr] = NULL; |
| return (ret); |
| } |
| /** |
| * nodePush: |
| * @ctxt: an XML parser context |
| * @value: the element node |
| * |
| * Pushes a new element node on top of the node stack |
| * |
| * Returns -1 in case of error, the index in the stack otherwise |
| */ |
| int |
| nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) |
| { |
| if (ctxt == NULL) return(0); |
| if (ctxt->nodeNr >= ctxt->nodeMax) { |
| xmlNodePtr *tmp; |
| |
| tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, |
| ctxt->nodeMax * 2 * |
| sizeof(ctxt->nodeTab[0])); |
| if (tmp == NULL) { |
| xmlErrMemory(ctxt, NULL); |
| return (-1); |
| } |
| ctxt->nodeTab = tmp; |
| ctxt->nodeMax *= 2; |
| } |
| if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) && |
| ((ctxt->options & XML_PARSE_HUGE) == 0)) { |
| xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, |
| "Excessive depth in document: %d use XML_PARSE_HUGE option\n", |
| xmlParserMaxDepth); |
| xmlHaltParser(ctxt); |
| return(-1); |
| } |
| ctxt->nodeTab[ctxt->nodeNr] = value; |
| ctxt->node = value; |
| return (ctxt->nodeNr++); |
| } |
| |
| /** |
| * nodePop: |
| * @ctxt: an XML parser context |
| * |
| * Pops the top element node from the node stack |
| * |
| * Returns the node just removed |
| */ |
| xmlNodePtr |
| nodePop(xmlParserCtxtPtr ctxt) |
| { |
| xmlNodePtr ret; |
| |
| if (ctxt == NULL) return(NULL); |
| if (ctxt->nodeNr <= 0) |
| return (NULL); |
| ctxt->nodeNr--; |
| if (ctxt->nodeNr > 0) |
| ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; |
| else |
| ctxt->node = NULL; |
| ret = ctxt->nodeTab[ctxt->nodeNr]; |
| ctxt->nodeTab[ctxt->nodeNr] = NULL; |
| return (ret); |
| } |
| |
| #ifdef LIBXML_PUSH_ENABLED |
| /** |
| * nameNsPush: |
| * @ctxt: an XML parser context |
| * @value: the element name |
| * @prefix: the element prefix |
| * @URI: the element namespace name |
| * |
| * Pushes a new element name/prefix/URL on top of the name stack |
| * |
| * Returns -1 in case of error, the index in the stack otherwise |
| */ |
| static int |
| nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, |
| const xmlChar *prefix, const xmlChar *URI, int nsNr) |
| { |
| if (ctxt->nameNr >= ctxt->nameMax) { |
| const xmlChar * *tmp; |
| void **tmp2; |
| ctxt->nameMax *= 2; |
| tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, |
| ctxt->nameMax * |
| sizeof(ctxt->nameTab[0])); |
| if (tmp == NULL) { |
| ctxt->nameMax /= 2; |
| goto mem_error; |
| } |
| ctxt->nameTab = tmp; |
| tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab, |
| ctxt->nameMax * 3 * |
| sizeof(ctxt->pushTab[0])); |
| if (tmp2 == NULL) { |
| ctxt->nameMax /= 2; |
| goto mem_error; |
| } |
| ctxt->pushTab = tmp2; |
| } |
| ctxt->nameTab[ctxt->nameNr] = value; |
| ctxt->name = value; |
| ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix; |
| ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI; |
| ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr; |
| return (ctxt->nameNr++); |
| mem_error: |
| xmlErrMemory(ctxt, NULL); |
| return (-1); |
| } |
| /** |
| * nameNsPop: |
| * @ctxt: an XML parser context |
| * |
| * Pops the top element/prefix/URI name from the name stack |
| * |
| * Returns the name just removed |
| */ |
| static const xmlChar * |
| nameNsPop(xmlParserCtxtPtr ctxt) |
| { |
| const xmlChar *ret; |
| |
| if (ctxt->nameNr <= 0) |
| return (NULL); |
| ctxt->nameNr--; |
| if (ctxt->nameNr > 0) |
| ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; |
| else |
| ctxt->name = NULL; |
| ret = ctxt->nameTab[ctxt->nameNr]; |
| ctxt->nameTab[ctxt->nameNr] = NULL; |
| return (ret); |
| } |
| #endif /* LIBXML_PUSH_ENABLED */ |
| |
| /** |
| * namePush: |
| * @ctxt: an XML parser context |
| * @value: the element name |
| * |
| * Pushes a new element name on top of the name stack |
| * |
| * Returns -1 in case of error, the index in the stack otherwise |
| */ |
| int |
| namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) |
| { |
| if (ctxt == NULL) return (-1); |
| |
| if (ctxt->nameNr >= ctxt->nameMax) { |
| const xmlChar * *tmp; |
| tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, |
| ctxt->nameMax * 2 * |
| sizeof(ctxt->nameTab[0])); |
| if (tmp == NULL) { |
| goto mem_error; |
| } |
| ctxt->nameTab = tmp; |
| ctxt->nameMax *= 2; |
| } |
| ctxt->nameTab[ctxt->nameNr] = value; |
| ctxt->name = value; |
| return (ctxt->nameNr++); |
| mem_error: |
| xmlErrMemory(ctxt, NULL); |
| return (-1); |
| } |
| /** |
| * namePop: |
| * @ctxt: an XML parser context |
| * |
| * Pops the top element name from the name stack |
| * |
| * Returns the name just removed |
| */ |
| const xmlChar * |
| namePop(xmlParserCtxtPtr ctxt) |
| { |
| const xmlChar *ret; |
| |
| if ((ctxt == NULL) || (ctxt->nameNr <= 0)) |
| return (NULL); |
| ctxt->nameNr--; |
| if (ctxt->nameNr > 0) |
| ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; |
| else |
| ctxt->name = NULL; |
| ret = ctxt->nameTab[ctxt->nameNr]; |
| ctxt->nameTab[ctxt->nameNr] = NULL; |
| return (ret); |
| } |
| |
| static int spacePush(xmlParserCtxtPtr ctxt, int val) { |
| if (ctxt->spaceNr >= ctxt->spaceMax) { |
| int *tmp; |
| |
| ctxt->spaceMax *= 2; |
| tmp = (int *) xmlRealloc(ctxt->spaceTab, |
| ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); |
| if (tmp == NULL) { |
| xmlErrMemory(ctxt, NULL); |
| ctxt->spaceMax /=2; |
| return(-1); |
| } |
| ctxt->spaceTab = tmp; |
| } |
| ctxt->spaceTab[ctxt->spaceNr] = val; |
| ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; |
| return(ctxt->spaceNr++); |
| } |
| |
| static int spacePop(xmlParserCtxtPtr ctxt) { |
| int ret; |
| if (ctxt->spaceNr <= 0) return(0); |
| ctxt->spaceNr--; |
| if (ctxt->spaceNr > 0) |
| ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; |
| else |
| ctxt->space = &ctxt->spaceTab[0]; |
| ret = ctxt->spaceTab[ctxt->spaceNr]; |
| ctxt->spaceTab[ctxt->spaceNr] = -1; |
| return(ret); |
| } |
| |
| /* |
| * Macros for accessing the content. Those should be used only by the parser, |
| * and not exported. |
| * |
| * Dirty macros, i.e. one often need to make assumption on the context to |
| * use them |
| * |
| * CUR_PTR return the current pointer to the xmlChar to be parsed. |
| * To be used with extreme caution since operations consuming |
| * characters may move the input buffer to a different location ! |
| * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled |
| * This should be used internally by the parser |
| * only to compare to ASCII values otherwise it would break when |
| * running with UTF-8 encoding. |
| * RAW same as CUR but in the input buffer, bypass any token |
| * extraction that may have been done |
| * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only |
| * to compare on ASCII based substring. |
| * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined |
| * strings without newlines within the parser. |
| * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII |
| * defined char within the parser. |
| * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding |
| * |
| * NEXT Skip to the next character, this does the proper decoding |
| * in UTF-8 mode. It also pop-up unfinished entities on the fly. |
| * NEXTL(l) Skip the current unicode character of l xmlChars long. |
| * CUR_CHAR(l) returns the current unicode character (int), set l |
| * to the number of xmlChars used for the encoding [0-5]. |
| * CUR_SCHAR same but operate on a string instead of the context |
| * COPY_BUF copy the current unicode char to the target buffer, increment |
| * the index |
| * GROW, SHRINK handling of input buffers |
| */ |
| |
| #define RAW (*ctxt->input->cur) |
| #define CUR (*ctxt->input->cur) |
| #define NXT(val) ctxt->input->cur[(val)] |
| #define CUR_PTR ctxt->input->cur |
| |
| #define CMP4( s, c1, c2, c3, c4 ) \ |
| ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ |
| ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) |
| #define CMP5( s, c1, c2, c3, c4, c5 ) \ |
| ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) |
| #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ |
| ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) |
| #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ |
| ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) |
| #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ |
| ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) |
| #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ |
| ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ |
| ((unsigned char *) s)[ 8 ] == c9 ) |
| #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ |
| ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ |
| ((unsigned char *) s)[ 9 ] == c10 ) |
| |
| #define SKIP(val) do { \ |
| ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \ |
| if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ |
| if ((*ctxt->input->cur == 0) && \ |
| (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ |
| xmlPopInput(ctxt); \ |
| } while (0) |
| |
| #define SKIPL(val) do { \ |
| int skipl; \ |
| for(skipl=0; skipl<val; skipl++) { \ |
| if (*(ctxt->input->cur) == '\n') { \ |
| ctxt->input->line++; ctxt->input->col = 1; \ |
| } else ctxt->input->col++; \ |
| ctxt->nbChars++; \ |
| ctxt->input->cur++; \ |
| } \ |
| if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ |
| if ((*ctxt->input->cur == 0) && \ |
| (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ |
| xmlPopInput(ctxt); \ |
| } while (0) |
| |
| #define SHRINK if ((ctxt->progressive == 0) && \ |
| (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ |
| (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ |
| xmlSHRINK (ctxt); |
| |
| static void xmlSHRINK (xmlParserCtxtPtr ctxt) { |
| xmlParserInputShrink(ctxt->input); |
| if ((*ctxt->input->cur == 0) && |
| (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) |
| xmlPopInput(ctxt); |
| } |
| |
| #define GROW if ((ctxt->progressive == 0) && \ |
| (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ |
| xmlGROW (ctxt); |
| |
| static void xmlGROW (xmlParserCtxtPtr ctxt) { |
| unsigned long curEnd = ctxt->input->end - ctxt->input->cur; |
| unsigned long curBase = ctxt->input->cur - ctxt->input->base; |
| |
| if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) || |
| (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) && |
| ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) && |
| ((ctxt->options & XML_PARSE_HUGE) == 0)) { |
| xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); |
| xmlHaltParser(ctxt); |
| return; |
| } |
| xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
| if ((ctxt->input->cur > ctxt->input->end) || |
| (ctxt->input->cur < ctxt->input->base)) { |
| xmlHaltParser(ctxt); |
| xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound"); |
| return; |
| } |
| if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) && |
| (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) |
| xmlPopInput(ctxt); |
| } |
| |
| #define SKIP_BLANKS xmlSkipBlankChars(ctxt) |
| |
| #define NEXT xmlNextChar(ctxt) |
| |
| #define NEXT1 { \ |
| ctxt->input->col++; \ |
| ctxt->input->cur++; \ |
| ctxt->nbChars++; \ |
| if (*ctxt->input->cur == 0) \ |
| xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ |
| } |
| |
| #define NEXTL(l) do { \ |
| if (*(ctxt->input->cur) == '\n') { \ |
| ctxt->input->line++; ctxt->input->col = 1; \ |
| } else ctxt->input->col++; \ |
| ctxt->input->cur += l; \ |
| if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ |
| } while (0) |
| |
| #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) |
| #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) |
| |
| #define COPY_BUF(l,b,i,v) \ |
| if (l == 1) b[i++] = (xmlChar) v; \ |
| else i += xmlCopyCharMultiByte(&b[i],v) |
| |
| /** |
| * xmlSkipBlankChars: |
| * @ctxt: the XML parser context |
| * |
| * skip all blanks character found at that point in the input streams. |
| * It pops up finished entities in the process if allowable at that point. |
| * |
| * Returns the number of space chars skipped |
| */ |
| |
| int |
| xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { |
| int res = 0; |
| |
| /* |
| * It's Okay to use CUR/NEXT here since all the blanks are on |
| * the ASCII range. |
| */ |
| if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { |
| const xmlChar *cur; |
| /* |
| * if we are in the document content, go really fast |
| */ |
| cur = ctxt->input->cur; |
| while (IS_BLANK_CH(*cur)) { |
| if (*cur == '\n') { |
| ctxt->input->line++; ctxt->input->col = 1; |
| } else { |
| ctxt->input->col++; |
| } |
| cur++; |
| res++; |
| if (*cur == 0) { |
| ctxt->input->cur = cur; |
| xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
| cur = ctxt->input->cur; |
| } |
| } |
| ctxt->input->cur = cur; |
| } else { |
| int cur; |
| do { |
| cur = CUR; |
| while ((IS_BLANK_CH(cur) && /* CHECKED tstblanks.xml */ |
| (ctxt->instate != XML_PARSER_EOF))) { |
| NEXT; |
| cur = CUR; |
| res++; |
| } |
| while ((cur == 0) && (ctxt->inputNr > 1) && |
| (ctxt->instate != XML_PARSER_COMMENT)) { |
| xmlPopInput(ctxt); |
| cur = CUR; |
| } |
| /* |
| * Need to handle support of entities branching here |
| */ |
| if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); |
| } while ((IS_BLANK(cur)) && /* CHECKED tstblanks.xml */ |
| (ctxt->instate != XML_PARSER_EOF)); |
| } |
| return(res); |
| } |
| |
| /************************************************************************ |
| * * |
| * Commodity functions to handle entities * |
| * * |
| ************************************************************************/ |
| |
| /** |
| * xmlPopInput: |
| * @ctxt: an XML parser context |
| * |
| * xmlPopInput: the current input pointed by ctxt->input came to an end |
| * pop it and return the next char. |
| * |
| * Returns the current xmlChar in the parser context |
| */ |
| xmlChar |
| xmlPopInput(xmlParserCtxtPtr ctxt) { |
| if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0); |
| if (xmlParserDebugEntities) |
| xmlGenericError(xmlGenericErrorContext, |
| "Popping input %d\n", ctxt->inputNr); |
| xmlFreeInputStream(inputPop(ctxt)); |
| if ((*ctxt->input->cur == 0) && |
| (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) |
| return(xmlPopInput(ctxt)); |
| return(CUR); |
| } |
| |
| /** |
| * xmlPushInput: |
| * @ctxt: an XML parser context |
| * @input: an XML parser input fragment (entity, XML fragment ...). |
| * |
| * xmlPushInput: switch to a new input stream which is stacked on top |
| * of the previous one(s). |
| * Returns -1 in case of error or the index in the input stack |
| */ |
| int |
| xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { |
| int ret; |
| if (input == NULL) return(-1); |
| |
| if (xmlParserDebugEntities) { |
| if ((ctxt->input != NULL) && (ctxt->input->filename)) |
| xmlGenericError(xmlGenericErrorContext, |
| "%s(%d): ", ctxt->input->filename, |
| ctxt->input->line); |
| xmlGenericError(xmlGenericErrorContext, |
| "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); |
| } |
| ret = inputPush(ctxt, input); |
| if (ctxt->instate == XML_PARSER_EOF) |
| return(-1); |
| GROW; |
| return(ret); |
| } |
| |
| /** |
| * xmlParseCharRef: |
| * @ctxt: an XML parser context |
| * |
| * parse Reference declarations |
| * |
| * [66] CharRef ::= '&#' [0-9]+ ';' | |
| * '&#x' [0-9a-fA-F]+ ';' |
| * |
| * [ WFC: Legal Character ] |
| * Characters referred to using character references must match the |
| * production for Char. |
| * |
| * Returns the value parsed (as an int), 0 in case of error |
| */ |
| int |
| xmlParseCharRef(xmlParserCtxtPtr ctxt) { |
| unsigned int val = 0; |
| int count = 0; |
| unsigned int outofrange = 0; |
| |
| /* |
| * Using RAW/CUR/NEXT is okay since we are working on ASCII range here |
| */ |
| if ((RAW == '&') && (NXT(1) == '#') && |
| (NXT(2) == 'x')) { |
| SKIP(3); |
| GROW; |
| while (RAW != ';') { /* loop blocked by count */ |
| if (count++ > 20) { |
| count = 0; |
| GROW; |
| if (ctxt->instate == XML_PARSER_EOF) |
| return(0); |
| } |
| if ((RAW >= '0') && (RAW <= '9')) |
| val = val * 16 + (CUR - '0'); |
| else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) |
| val = val * 16 + (CUR - 'a') + 10; |
| else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) |
| val = val * 16 + (CUR - 'A') + 10; |
| else { |
| xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); |
| val = 0; |
| break; |
| } |
| if (val > 0x10FFFF) |
| outofrange = val; |
| |
| NEXT; |
| count++; |
| } |
| if (RAW == ';') { |
| /* on purpose to avoid reentrancy problems with NEXT and SKIP */ |
| ctxt->input->col++; |
| ctxt->nbChars ++; |
| ctxt->input->cur++; |
| } |
| } else if ((RAW == '&') && (NXT(1) == '#')) { |
| SKIP(2); |
| GROW; |
| while (RAW != ';') { /* loop blocked by count */ |
| if (count++ > 20) { |
| count = 0; |
| GROW; |
| if (ctxt->instate == XML_PARSER_EOF) |
| return(0); |
| } |
| if ((RAW >= '0') && (RAW <= '9')) |
| val = val * 10 + (CUR - '0'); |
| else { |
| xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); |
| val = 0; |
| break; |
| } |
| if (val > 0x10FFFF) |
| outofrange = val; |
| |
| NEXT; |
| count++; |
| } |
| if (RAW == ';') { |
| /* on purpose to avoid reentrancy problems with NEXT and SKIP */ |
| ctxt->input->col++; |
| ctxt->nbChars ++; |
| ctxt->input->cur++; |
| } |
| } else { |
| xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); |
| } |
| |
| /* |
| * [ WFC: Legal Character ] |
| * Characters referred to using character references must match the |
| * production for Char. |
| */ |
| if ((IS_CHAR(val) && (outofrange == 0))) { |
| return(val); |
| } else { |
| xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, |
| "xmlParseCharRef: invalid xmlChar value %d\n", |
| val); |
| } |
| return(0); |
| } |
| |
| /** |
| * xmlParseStringCharRef: |
| * @ctxt: an XML parser context |
| * @str: a pointer to an index in the string |
| * |
| * parse Reference declarations, variant parsing from a string rather |
| * than an an input flow. |
| * |
| * [66] CharRef ::= '&#' [0-9]+ ';' | |
| * '&#x' [0-9a-fA-F]+ ';' |
| * |
| * [ WFC: Legal Character ] |
| * Characters referred to using character references must match the |
| * production for Char. |
| * |
| * Returns the value parsed (as an int), 0 in case of error, str will be |
| * updated to the current value of the index |
| */ |
| static int |
| xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { |
| const xmlChar *ptr; |
| xmlChar cur; |
| unsigned int val = 0; |
| unsigned int outofrange = 0; |
| |
| if ((str == NULL) || (*str == NULL)) return(0); |
| ptr = *str; |
| cur = *ptr; |
| if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { |
| ptr += 3; |
| cur = *ptr; |
| while (cur != ';') { /* Non input consuming loop */ |
| if ((cur >= '0') && (cur <= '9')) |
| val = val * 16 + (cur - '0'); |
| else if ((cur >= 'a') && (cur <= 'f')) |
| val = val * 16 + (cur - 'a') + 10; |
| else if ((cur >= 'A') && (cur <= 'F')) |
| val = val * 16 + (cur - 'A') + 10; |
| else { |
| xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); |
| val = 0; |
| break; |
| } |
| if (val > 0x10FFFF) |
| outofrange = val; |
| |
| ptr++; |
| cur = *ptr; |
| } |
| if (cur == ';') |
| ptr++; |
| } else if ((cur == '&') && (ptr[1] == '#')){ |
| ptr += 2; |
| cur = *ptr; |
| while (cur != ';') { /* Non input consuming loops */ |
| if ((cur >= '0') && (cur <= '9')) |
| val = val * 10 + (cur - '0'); |
| else { |
| xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); |
| val = 0; |
| break; |
| } |
| if (val > 0x10FFFF) |
| outofrange = val; |
| |
| ptr++; |
| cur = *ptr; |
| } |
| if (cur == ';') |
| ptr++; |
| } else { |
| xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); |
| return(0); |
| } |
| *str = ptr; |
| |
| /* |
| * [ WFC: Legal Character ] |
| * Characters referred to using character references must match the |
| * production for Char. |
| */ |
| if ((IS_CHAR(val) && (outofrange == 0))) { |
| return(val); |
| } else { |
| xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, |
| "xmlParseStringCharRef: invalid xmlChar value %d\n", |
| val); |
| } |
| return(0); |
| } |
| |
| /** |
| * xmlNewBlanksWrapperInputStream: |
| * @ctxt: an XML parser context |
| * @entity: an Entity pointer |
| * |
| * Create a new input stream for wrapping |
| * blanks around a PEReference |
| * |
| * Returns the new input stream or NULL |
| */ |
| |
| static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} |
| |
| static xmlParserInputPtr |
| xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { |
| xmlParserInputPtr input; |
| xmlChar *buffer; |
| size_t length; |
| if (entity == NULL) { |
| xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, |
| "xmlNewBlanksWrapperInputStream entity\n"); |
| return(NULL); |
| } |
| if (xmlParserDebugEntities) |
| xmlGenericError(xmlGenericErrorContext, |
| "new blanks wrapper for entity: %s\n", entity->name); |
| input = xmlNewInputStream(ctxt); |
| if (input == NULL) { |
| return(NULL); |
| } |
| length = xmlStrlen(entity->name) + 5; |
| buffer = xmlMallocAtomic(length); |
| if (buffer == NULL) { |
| xmlErrMemory(ctxt, NULL); |
| xmlFree(input); |
| return(NULL); |
| } |
| buffer [0] = ' '; |
| buffer [1] = '%'; |
| buffer [length-3] = ';'; |
| buffer [length-2] = ' '; |
| buffer [length-1] = 0; |
| memcpy(buffer + 2, entity->name, length - 5); |
| input->free = deallocblankswrapper; |
| input->base = buffer; |
| input->cur = buffer; |
| input->length = length; |
| input->end = &buffer[length]; |
| return(input); |
| } |
| |
| /** |
| * xmlParserHandlePEReference: |
| * @ctxt: the parser context |
| * |
| * [69] PEReference ::= '%' Name ';' |
| * |
| * [ WFC: No Recursion ] |
| * A parsed entity must not contain a recursive |
| * reference to itself, either directly or indirectly. |
| * |
| * [ WFC: Entity Declared ] |
| * In a document without any DTD, a document with only an internal DTD |
| * subset which contains no parameter entity references, or a document |
| * with "standalone='yes'", ... ... The declaration of a parameter |
| * entity must precede any reference to it... |
| * |
| * [ VC: Entity Declared ] |
| * In a document with an external subset or external parameter entities |
| * with "standalone='no'", ... ... The declaration of a parameter entity |
| * must precede any reference to it... |
| * |
| * [ WFC: In DTD ] |
| * Parameter-entity references may only appear in the DTD. |
| * NOTE: misleading but this is handled. |
| * |
| * A PEReference may have been detected in the current input stream |
| * the handling is done accordingly to |
| * http://www.w3.org/TR/REC-xml#entproc |
| * i.e. |
| * - Included in literal in entity values |
| * - Included as Parameter Entity reference within DTDs |
| */ |
| void |
| xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { |
| const xmlChar *name; |
| xmlEntityPtr entity = NULL; |
| xmlParserInputPtr input; |
| |
| if (RAW != '%') return; |
| switch(ctxt->instate) { |
| case XML_PARSER_CDATA_SECTION: |
| return; |
| case XML_PARSER_COMMENT: |
| return; |
| case XML_PARSER_START_TAG: |
| return; |
| case XML_PARSER_END_TAG: |
| return; |
| case XML_PARSER_EOF: |
| xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); |
| return; |
| case XML_PARSER_PROLOG: |
| case XML_PARSER_START: |
| case XML_PARSER_MISC: |
| xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); |
| return; |
| case XML_PARSER_ENTITY_DECL: |
| case XML_PARSER_CONTENT: |
| case XML_PARSER_ATTRIBUTE_VALUE: |
| case XML_PARSER_PI: |
| case XML_PARSER_SYSTEM_LITERAL: |
| case XML_PARSER_PUBLIC_LITERAL: |
| /* we just ignore it there */ |
| return; |
| case XML_PARSER_EPILOG: |
| xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); |
| return; |
| case XML_PARSER_ENTITY_VALUE: |
| /* |
| * NOTE: in the case of entity values, we don't do the |
| * substitution here since we need the literal |
| * entity value to be able to save the internal |
| * subset of the document. |
| * This will be handled by xmlStringDecodeEntities |
| */ |
| return; |
| case XML_PARSER_DTD: |
| /* |
| * [WFC: Well-Formedness Constraint: PEs in Internal Subset] |
| * In the internal DTD subset, parameter-entity references |
| * can occur only where markup declarations can occur, not |
| * within markup declarations. |
| * In that case this is handled in xmlParseMarkupDecl |
| */ |
| if ((ctxt->external == 0) && (ctxt->inputNr == 1)) |
| return; |
| if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) |
| return; |
| break; |
| case XML_PARSER_IGNORE: |
| return; |
| } |
| |
| NEXT; |
| name = xmlParseName(ctxt); |
| if (xmlParserDebugEntities) |
| xmlGenericError(xmlGenericErrorContext, |
| "PEReference: %s\n", name); |
| if (name == NULL) { |
| xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL); |
| } else { |
| if (RAW == ';') { |
| NEXT; |
| if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) |
| entity = ctxt->sax->getParameterEntity(ctxt->userData, name); |
| if (ctxt->instate == XML_PARSER_EOF) |
| return; |
| if (entity == NULL) { |
| |
| /* |
| * [ WFC: Entity Declared ] |
| * In a document without any DTD, a document with only an |
| * internal DTD subset which contains no parameter entity |
| * references, or a document with "standalone='yes'", ... |
| * ... The declaration of a parameter entity must precede |
| * any reference to it... |
| */ |
| if ((ctxt->standalone == 1) || |
| ((ctxt->hasExternalSubset == 0) && |
| (ctxt->hasPErefs == 0))) { |
| xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, |
| "PEReference: %%%s; not found\n", name); |
| } else { |
| /* |
| * [ VC: Entity Declared ] |
| * In a document with an external subset or external |
| * parameter entities with "standalone='no'", ... |
| * ... The declaration of a parameter entity must precede |
| * any reference to it... |
| */ |
| if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { |
| xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, |
| "PEReference: %%%s; not found\n", |
| name, NULL); |
| } else |
| xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, |
| "PEReference: %%%s; not found\n", |
| name, NULL); |
| ctxt->valid = 0; |
| } |
| xmlParserEntityCheck(ctxt, 0, NULL, 0); |
| } else if (ctxt->input->free != deallocblankswrapper) { |
| input = xmlNewBlanksWrapperInputStream(ctxt, entity); |
| if (xmlPushInput(ctxt, input) < 0) |
| return; |
| } else { |
| if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || |
| (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { |
| xmlChar start[4]; |
| xmlCharEncoding enc; |
| |
| /* |
| * Note: external parameter entities will not be loaded, it |
| * is not required for a non-validating parser, unless the |
| * option of validating, or substituting entities were |
| * given. Doing so is far more secure as the parser will |
| * only process data coming from the document entity by |
| * default. |
| */ |
| if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && |
| ((ctxt->options & XML_PARSE_NOENT) == 0) && |
| ((ctxt->options & XML_PARSE_DTDVALID) == 0) && |
| ((ctxt->options & XML_PARSE_DTDLOAD) == 0) && |
| ((ctxt->options & XML_PARSE_DTDATTR) == 0) && |
| (ctxt->replaceEntities == 0) && |
| (ctxt->validate == 0)) |
| return; |
| |
| /* |
| * handle the extra spaces added before and after |
| * c.f. http://www.w3.org/TR/REC-xml#as-PE |
| * this is done independently. |
| */ |
| input = xmlNewEntityInputStream(ctxt, entity); |
| if (xmlPushInput(ctxt, input) < 0) |
| return; |
| |
| /* |
| * Get the 4 first bytes and decode the charset |
| * if enc != XML_CHAR_ENCODING_NONE |
| * plug some encoding conversion routines. |
| * Note that, since we may have some non-UTF8 |
| * encoding (like UTF16, bug 135229), the 'length' |
| * is not known, but we can calculate based upon |
| * the amount of data in the buffer. |
| */ |
| GROW |
| if (ctxt->instate == XML_PARSER_EOF) |
| return; |
| if ((ctxt->input->end - ctxt->input->cur)>=4) { |
| start[0] = RAW; |
| start[1] = NXT(1); |
| start[2] = NXT(2); |
| start[3] = NXT(3); |
| enc = xmlDetectCharEncoding(start, 4); |
| if (enc != XML_CHAR_ENCODING_NONE) { |
| xmlSwitchEncoding(ctxt, enc); |
| } |
| } |
| |
| if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && |
| (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) && |
| (IS_BLANK_CH(NXT(5)))) { |
| xmlParseTextDecl(ctxt); |
| } |
| } else { |
| xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, |
| "PEReference: %s is not a parameter entity\n", |
| name); |
| } |
| } |
| } else { |
| xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); |
| } |
| } |
| } |
| |
| /* |
| * Macro used to grow the current buffer. |
| * buffer##_size is expected to be a size_t |
| * mem_error: is expected to handle memory allocation failures |
| */ |
| #define growBuffer(buffer, n) { \ |
| xmlChar *tmp; \ |
| size_t new_size = buffer##_size * 2 + n; \ |
| if (new_size < buffer##_size) goto mem_error; \ |
| tmp = (xmlChar *) xmlRealloc(buffer, new_size); \ |
| if (tmp == NULL) goto mem_error; \ |
| buffer = tmp; \ |
| buffer##_size = new_size; \ |
| } |
| |
| /** |
| * xmlStringLenDecodeEntities: |
| * @ctxt: the parser context |
| * @str: the input string |
| * @len: the string length |
| * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF |
| * @end: an end marker xmlChar, 0 if none |
| * @end2: an end marker xmlChar, 0 if none |
| * @end3: an end marker xmlChar, 0 if none |
| * |
| * Takes a entity string content and process to do the adequate substitutions. |
| * |
| * [67] Reference ::= EntityRef | CharRef |
| * |
| * [69] PEReference ::= '%' Name ';' |
| * |
| * Returns A newly allocated string with the substitution done. The caller |
| * must deallocate it ! |
| */ |
| xmlChar * |
| xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, |
| int what, xmlChar end, xmlChar end2, xmlChar end3) { |
| xmlChar *buffer = NULL; |
| size_t buffer_size = 0; |
| size_t nbchars = 0; |
| |
| xmlChar *current = NULL; |
| xmlChar *rep = NULL; |
| const xmlChar *last; |
| xmlEntityPtr ent; |
| int c,l; |
| |
| if ((ctxt == NULL) || (str == NULL) || (len < 0)) |
| return(NULL); |
| last = str + len; |
| |
| if (((ctxt->depth > 40) && |
| ((ctxt->options & XML_PARSE_HUGE) == 0)) || |
| (ctxt->depth > 1024)) { |
| xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); |
| return(NULL); |
| } |
| |
| /* |
| * allocate a translation buffer. |
| */ |
| buffer_size = XML_PARSER_BIG_BUFFER_SIZE; |
| buffer = (xmlChar *) xmlMallocAtomic(buffer_size); |
| if (buffer == NULL) goto mem_error; |
| |
| /* |
| * OK loop until we reach one of the ending char or a size limit. |
| * we are operating on already parsed values. |
| */ |
| if (str < last) |
| c = CUR_SCHAR(str, l); |
| else |
| c = 0; |
| while ((c != 0) && (c != end) && /* non input consuming loop */ |
| (c != end2) && (c != end3)) { |
| |
| if (c == 0) break; |
| if ((c == '&') && (str[1] == '#')) { |
| int val = xmlParseStringCharRef(ctxt, &str); |
| if (val != 0) { |
| COPY_BUF(0,buffer,nbchars,val); |
| } |
| if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { |
| growBuffer(buffer, XML_PARSER_BUFFER_SIZE); |
| } |
| } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { |
| if (xmlParserDebugEntities) |
| xmlGenericError(xmlGenericErrorContext, |
| "String decoding Entity Reference: %.30s\n", |
| str); |
| ent = xmlParseStringEntityRef(ctxt, &str); |
| if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) || |
| (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR)) |
| goto int_error; |
| xmlParserEntityCheck(ctxt, 0, ent, 0); |
| if (ent != NULL) |
| ctxt->nbentities += ent->checked / 2; |
| if ((ent != NULL) && |
| (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { |
| if (ent->content != NULL) { |
| COPY_BUF(0,buffer,nbchars,ent->content[0]); |
| if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { |
| growBuffer(buffer, XML_PARSER_BUFFER_SIZE); |
| } |
| } else { |
| xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, |
| "predefined entity has no content\n"); |
| } |
| } else if ((ent != NULL) && (ent->content != NULL)) { |
| ctxt->depth++; |
| rep = xmlStringDecodeEntities(ctxt, ent->content, what, |
| 0, 0, 0); |
| ctxt->depth--; |
| |
| if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) || |
| (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR)) |
| goto int_error; |
| |
| if (rep != NULL) { |
| current = rep; |
| while (*current != 0) { /* non input consuming loop */ |
| buffer[nbchars++] = *current++; |
| if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { |
| if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) |
| goto int_error; |
| growBuffer(buffer, XML_PARSER_BUFFER_SIZE); |
| } |
| } |
| xmlFree(rep); |
| rep = NULL; |
| } |
| } else if (ent != NULL) { |
| int i = xmlStrlen(ent->name); |
| const xmlChar *cur = ent->name; |
| |
| buffer[nbchars++] = '&'; |
| if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) { |
| growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE); |
| } |
| for (;i > 0;i--) |
| buffer[nbchars++] = *cur++; |
| buffer[nbchars++] = ';'; |
| } |
| } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { |
| if (xmlParserDebugEntities) |
| xmlGenericError(xmlGenericErrorContext, |
| "String decoding PE Reference: %.30s\n", str); |
| ent = xmlParseStringPEReference(ctxt, &str); |
| if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) |
| goto int_error; |
| xmlParserEntityCheck(ctxt, 0, ent, 0); |
| if (ent != NULL) |
| ctxt->nbentities += ent->checked / 2; |
| if (ent != NULL) { |
| if (ent->content == NULL) { |
| xmlLoadEntityContent(ctxt, ent); |
| } |
| ctxt->depth++; |
| rep = xmlStringDecodeEntities(ctxt, ent->content, what, |
| 0, 0, 0); |
| ctxt->depth--; |
| if (rep != NULL) { |
| current = rep; |
| while (*current != 0) { /* non input consuming loop */ |
| buffer[nbchars++] = *current++; |
| if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { |
| if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) |
| goto int_error; |
| growBuffer(buffer, XML_PARSER_BUFFER_SIZE); |
| } |
| } |
| xmlFree(rep); |
| rep = NULL; |
| } |
| } |
| } else { |
| COPY_BUF(l,buffer,nbchars,c); |
| str += l; |
| if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { |
| growBuffer(buffer, XML_PARSER_BUFFER_SIZE); |
| } |
| } |
| if (str < last) |
| c = CUR_SCHAR(str, l); |
| else |
| c = 0; |
| } |
| buffer[nbchars] = 0; |
| return(buffer); |
| |
| mem_error: |
| xmlErrMemory(ctxt, NULL); |
| int_error: |
| if (rep != NULL) |
| xmlFree(rep); |
| if (buffer != NULL) |
| xmlFree(buffer); |
| return(NULL); |
| } |
| |
| /** |
| * xmlStringDecodeEntities: |
| * @ctxt: the parser context |
| * @str: the input string |
| * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF |
| * @end: an end marker xmlChar, 0 if none |
| * @end2: an end marker xmlChar, 0 if none |
| * @end3: an end marker xmlChar, 0 if none |
| * |
| * Takes a entity string content and process to do the adequate substitutions. |
| * |
| * [67] Reference ::= EntityRef | CharRef |
| * |
| * [69] PEReference ::= '%' Name ';' |
| * |
| * Returns A newly allocated string with the substitution done. The caller |
| * must deallocate it ! |
| */ |
| xmlChar * |
| xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, |
| xmlChar end, xmlChar end2, xmlChar end3) { |
| if ((ctxt == NULL) || (str == NULL)) return(NULL); |
| return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, |
| end, end2, end3)); |
| } |
| |
| /************************************************************************ |
| * * |
| * Commodity functions, cleanup needed ? * |
| * * |
| ************************************************************************/ |
| |
| /** |
| * areBlanks: |
| * @ctxt: an XML parser context |
| * @str: a xmlChar * |
| * @len: the size of @str |
| * @blank_chars: we know the chars are blanks |
| * |
| * Is this a sequence of blank chars that one can ignore ? |
| * |
| * Returns 1 if ignorable 0 otherwise. |
| */ |
| |
| static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, |
| int blank_chars) { |
| int i, ret; |
| xmlNodePtr lastChild; |
| |
| /* |
| * Don't spend time trying to differentiate them, the same callback is |
| * used ! |
| */ |
| if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) |
| return(0); |
| |
| /* |
| * Check for xml:space value. |
| */ |
| if ((ctxt->space == NULL) || (*(ctxt->space) == 1) || |
| (*(ctxt->space) == -2)) |
| return(0); |
| |
| /* |
| * Check that the string is made of blanks |
| */ |
| if (blank_chars == 0) { |
| for (i = 0;i < len;i++) |
| if (!(IS_BLANK_CH(str[i]))) return(0); |
| } |
| |
| /* |
| * Look if the element is mixed content in the DTD if available |
| */ |
| if (ctxt->node == NULL) return(0); |
| if (ctxt->myDoc != NULL) { |
| ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); |
| if (ret == 0) return(1); |
| if (ret == 1) return(0); |
| } |
| |
| /* |
| * Otherwise, heuristic :-\ |
| */ |
| if ((RAW != '<') && (RAW != 0xD)) return(0); |
| if ((ctxt->node->children == NULL) && |
| (RAW == '<') && (NXT(1) == '/')) return(0); |
| |
| lastChild = xmlGetLastChild(ctxt->node); |
| if (lastChild == NULL) { |
| if ((ctxt->node->type != XML_ELEMENT_NODE) && |
| (ctxt->node->content != NULL)) return(0); |
| } else if (xmlNodeIsText(lastChild)) |
| return(0); |
|