| /* |
| * parser.c : an XML 1.0 parser, namespaces and validity support are mostly |
| * implemented on top of the SAX interfaces |
| * |
| * References: |
| * The XML specification: |
| * http://www.w3.org/TR/REC-xml |
| * Original 1.0 version: |
| * http://www.w3.org/TR/1998/REC-xml-19980210 |
| * XML second edition working draft |
| * http://www.w3.org/TR/2000/WD-xml-2e-20000814 |
| * |
| * Okay this is a big file, the parser core is around 7000 lines, then it |
| * is followed by the progressive parser top routines, then the various |
| * high level APIs to call the parser and a few miscellaneous functions. |
| * A number of helper functions and deprecated ones have been moved to |
| * parserInternals.c to reduce this file size. |
| * As much as possible the functions are associated with their relative |
| * production in the XML specification. A few productions defining the |
| * different ranges of character are actually implanted either in |
| * parserInternals.h or parserInternals.c |
| * The DOM tree build is realized from the default SAX callbacks in |
| * the module SAX.c. |
| * The routines doing the validation checks are in valid.c and called either |
| * from the SAX callbacks or as standalone functions using a preparsed |
| * document. |
| * |
| * See Copyright for the status of this software. |
| * |
| * daniel@veillard.com |
| */ |
| |
| #define IN_LIBXML |
| #include "libxml.h" |
| |
| #if defined(WIN32) && !defined (__CYGWIN__) |
| #define XML_DIR_SEP '\\' |
| #else |
| #define XML_DIR_SEP '/' |
| #endif |
| |
| #include <stdlib.h> |
| #include <string.h> |
| #include <stdarg.h> |
| #include <libxml/xmlmemory.h> |
| #include <libxml/threads.h> |
| #include <libxml/globals.h> |
| #include <libxml/tree.h> |
| #include <libxml/parser.h> |
| #include <libxml/parserInternals.h> |
| #include <libxml/valid.h> |
| #include <libxml/entities.h> |
| #include <libxml/xmlerror.h> |
| #include <libxml/encoding.h> |
| #include <libxml/xmlIO.h> |
| #include <libxml/uri.h> |
| #ifdef LIBXML_CATALOG_ENABLED |
| #include <libxml/catalog.h> |
| #endif |
| |
| #ifdef HAVE_CTYPE_H |
| #include <ctype.h> |
| #endif |
| #ifdef HAVE_STDLIB_H |
| #include <stdlib.h> |
| #endif |
| #ifdef HAVE_SYS_STAT_H |
| #include <sys/stat.h> |
| #endif |
| #ifdef HAVE_FCNTL_H |
| #include <fcntl.h> |
| #endif |
| #ifdef HAVE_UNISTD_H |
| #include <unistd.h> |
| #endif |
| #ifdef HAVE_ZLIB_H |
| #include <zlib.h> |
| #endif |
| |
| /** |
| * xmlParserMaxDepth: |
| * |
| * arbitrary depth limit for the XML documents that we allow to |
| * process. This is not a limitation of the parser but a safety |
| * boundary feature. |
| */ |
| unsigned int xmlParserMaxDepth = 1024; |
| |
| #define SAX2 1 |
| |
| #define XML_PARSER_BIG_BUFFER_SIZE 300 |
| #define XML_PARSER_BUFFER_SIZE 100 |
| |
| #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" |
| |
| /* |
| * List of XML prefixed PI allowed by W3C specs |
| */ |
| |
| static const char *xmlW3CPIs[] = { |
| "xml-stylesheet", |
| NULL |
| }; |
| |
| |
| /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ |
| xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, |
| const xmlChar **str); |
| |
| static xmlParserErrors |
| xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, |
| xmlSAXHandlerPtr sax, |
| void *user_data, int depth, const xmlChar *URL, |
| const xmlChar *ID, xmlNodePtr *list); |
| |
| #ifdef LIBXML_LEGACY_ENABLED |
| static void |
| xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, |
| xmlNodePtr lastNode); |
| #endif /* LIBXML_LEGACY_ENABLED */ |
| |
| static xmlParserErrors |
| xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, |
| const xmlChar *string, void *user_data, xmlNodePtr *lst); |
| |
| /************************************************************************ |
| * * |
| * Some factorized error routines * |
| * * |
| ************************************************************************/ |
| |
| /** |
| * xmlErrAttributeDup: |
| * @ctxt: an XML parser context |
| * @prefix: the attribute prefix |
| * @localname: the attribute localname |
| * |
| * Handle a redefinition of attribute error |
| */ |
| static void |
| xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, |
| const xmlChar * localname) |
| { |
| if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
| (ctxt->instate == XML_PARSER_EOF)) |
| return; |
| ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; |
| if (prefix == NULL) |
| __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, |
| ctxt->errNo, XML_ERR_FATAL, NULL, 0, |
| (const char *) localname, NULL, NULL, 0, 0, |
| "Attribute %s redefined\n", localname); |
| else |
| __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, |
| ctxt->errNo, XML_ERR_FATAL, NULL, 0, |
| (const char *) prefix, (const char *) localname, |
| NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, |
| localname); |
| ctxt->wellFormed = 0; |
| if (ctxt->recovery == 0) |
| ctxt->disableSAX = 1; |
| } |
| |
| /** |
| * xmlFatalErr: |
| * @ctxt: an XML parser context |
| * @error: the error number |
| * @extra: extra information string |
| * |
| * Handle a fatal parser error, i.e. violating Well-Formedness constraints |
| */ |
| static void |
| xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) |
| { |
| const char *errmsg; |
| |
| if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
| (ctxt->instate == XML_PARSER_EOF)) |
| return; |
| switch (error) { |
| case XML_ERR_INVALID_HEX_CHARREF: |
| errmsg = "CharRef: invalid hexadecimal value\n"; |
| break; |
| case XML_ERR_INVALID_DEC_CHARREF: |
| errmsg = "CharRef: invalid decimal value\n"; |
| break; |
| case XML_ERR_INVALID_CHARREF: |
| errmsg = "CharRef: invalid value\n"; |
| break; |
| case XML_ERR_INTERNAL_ERROR: |
| errmsg = "internal error"; |
| break; |
| case XML_ERR_PEREF_AT_EOF: |
| errmsg = "PEReference at end of document\n"; |
| break; |
| case XML_ERR_PEREF_IN_PROLOG: |
| errmsg = "PEReference in prolog\n"; |
| break; |
| case XML_ERR_PEREF_IN_EPILOG: |
| errmsg = "PEReference in epilog\n"; |
| break; |
| case XML_ERR_PEREF_NO_NAME: |
| errmsg = "PEReference: no name\n"; |
| break; |
| case XML_ERR_PEREF_SEMICOL_MISSING: |
| errmsg = "PEReference: expecting ';'\n"; |
| break; |
| case XML_ERR_ENTITY_LOOP: |
| errmsg = "Detected an entity reference loop\n"; |
| break; |
| case XML_ERR_ENTITY_NOT_STARTED: |
| errmsg = "EntityValue: \" or ' expected\n"; |
| break; |
| case XML_ERR_ENTITY_PE_INTERNAL: |
| errmsg = "PEReferences forbidden in internal subset\n"; |
| break; |
| case XML_ERR_ENTITY_NOT_FINISHED: |
| errmsg = "EntityValue: \" or ' expected\n"; |
| break; |
| case XML_ERR_ATTRIBUTE_NOT_STARTED: |
| errmsg = "AttValue: \" or ' expected\n"; |
| break; |
| case XML_ERR_LT_IN_ATTRIBUTE: |
| errmsg = "Unescaped '<' not allowed in attributes values\n"; |
| break; |
| case XML_ERR_LITERAL_NOT_STARTED: |
| errmsg = "SystemLiteral \" or ' expected\n"; |
| break; |
| case XML_ERR_LITERAL_NOT_FINISHED: |
| errmsg = "Unfinished System or Public ID \" or ' expected\n"; |
| break; |
| case XML_ERR_MISPLACED_CDATA_END: |
| errmsg = "Sequence ']]>' not allowed in content\n"; |
| break; |
| case XML_ERR_URI_REQUIRED: |
| errmsg = "SYSTEM or PUBLIC, the URI is missing\n"; |
| break; |
| case XML_ERR_PUBID_REQUIRED: |
| errmsg = "PUBLIC, the Public Identifier is missing\n"; |
| break; |
| case XML_ERR_HYPHEN_IN_COMMENT: |
| errmsg = "Comment must not contain '--' (double-hyphen)\n"; |
| break; |
| case XML_ERR_PI_NOT_STARTED: |
| errmsg = "xmlParsePI : no target name\n"; |
| break; |
| case XML_ERR_RESERVED_XML_NAME: |
| errmsg = "Invalid PI name\n"; |
| break; |
| case XML_ERR_NOTATION_NOT_STARTED: |
| errmsg = "NOTATION: Name expected here\n"; |
| break; |
| case XML_ERR_NOTATION_NOT_FINISHED: |
| errmsg = "'>' required to close NOTATION declaration\n"; |
| break; |
| case XML_ERR_VALUE_REQUIRED: |
| errmsg = "Entity value required\n"; |
| break; |
| case XML_ERR_URI_FRAGMENT: |
| errmsg = "Fragment not allowed"; |
| break; |
| case XML_ERR_ATTLIST_NOT_STARTED: |
| errmsg = "'(' required to start ATTLIST enumeration\n"; |
| break; |
| case XML_ERR_NMTOKEN_REQUIRED: |
| errmsg = "NmToken expected in ATTLIST enumeration\n"; |
| break; |
| case XML_ERR_ATTLIST_NOT_FINISHED: |
| errmsg = "')' required to finish ATTLIST enumeration\n"; |
| break; |
| case XML_ERR_MIXED_NOT_STARTED: |
| errmsg = "MixedContentDecl : '|' or ')*' expected\n"; |
| break; |
| case XML_ERR_PCDATA_REQUIRED: |
| errmsg = "MixedContentDecl : '#PCDATA' expected\n"; |
| break; |
| case XML_ERR_ELEMCONTENT_NOT_STARTED: |
| errmsg = "ContentDecl : Name or '(' expected\n"; |
| break; |
| case XML_ERR_ELEMCONTENT_NOT_FINISHED: |
| errmsg = "ContentDecl : ',' '|' or ')' expected\n"; |
| break; |
| case XML_ERR_PEREF_IN_INT_SUBSET: |
| errmsg = |
| "PEReference: forbidden within markup decl in internal subset\n"; |
| break; |
| case XML_ERR_GT_REQUIRED: |
| errmsg = "expected '>'\n"; |
| break; |
| case XML_ERR_CONDSEC_INVALID: |
| errmsg = "XML conditional section '[' expected\n"; |
| break; |
| case XML_ERR_EXT_SUBSET_NOT_FINISHED: |
| errmsg = "Content error in the external subset\n"; |
| break; |
| case XML_ERR_CONDSEC_INVALID_KEYWORD: |
| errmsg = |
| "conditional section INCLUDE or IGNORE keyword expected\n"; |
| break; |
| case XML_ERR_CONDSEC_NOT_FINISHED: |
| errmsg = "XML conditional section not closed\n"; |
| break; |
| case XML_ERR_XMLDECL_NOT_STARTED: |
| errmsg = "Text declaration '<?xml' required\n"; |
| break; |
| case XML_ERR_XMLDECL_NOT_FINISHED: |
| errmsg = "parsing XML declaration: '?>' expected\n"; |
| break; |
| case XML_ERR_EXT_ENTITY_STANDALONE: |
| errmsg = "external parsed entities cannot be standalone\n"; |
| break; |
| case XML_ERR_ENTITYREF_SEMICOL_MISSING: |
| errmsg = "EntityRef: expecting ';'\n"; |
| break; |
| case XML_ERR_DOCTYPE_NOT_FINISHED: |
| errmsg = "DOCTYPE improperly terminated\n"; |
| break; |
| case XML_ERR_LTSLASH_REQUIRED: |
| errmsg = "EndTag: '</' not found\n"; |
| break; |
| case XML_ERR_EQUAL_REQUIRED: |
| errmsg = "expected '='\n"; |
| break; |
| case XML_ERR_STRING_NOT_CLOSED: |
| errmsg = "String not closed expecting \" or '\n"; |
| break; |
| case XML_ERR_STRING_NOT_STARTED: |
| errmsg = "String not started expecting ' or \"\n"; |
| break; |
| case XML_ERR_ENCODING_NAME: |
| errmsg = "Invalid XML encoding name\n"; |
| break; |
| case XML_ERR_STANDALONE_VALUE: |
| errmsg = "standalone accepts only 'yes' or 'no'\n"; |
| break; |
| case XML_ERR_DOCUMENT_EMPTY: |
| errmsg = "Document is empty\n"; |
| break; |
| case XML_ERR_DOCUMENT_END: |
| errmsg = "Extra content at the end of the document\n"; |
| break; |
| case XML_ERR_NOT_WELL_BALANCED: |
| errmsg = "chunk is not well balanced\n"; |
| break; |
| case XML_ERR_EXTRA_CONTENT: |
| errmsg = "extra content at the end of well balanced chunk\n"; |
| break; |
| case XML_ERR_VERSION_MISSING: |
| errmsg = "Malformed declaration expecting version\n"; |
| break; |
| #if 0 |
| case: |
| errmsg = "\n"; |
| break; |
| #endif |
| default: |
| errmsg = "Unregistered error message\n"; |
| } |
| ctxt->errNo = error; |
| __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, |
| XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg, |
| info); |
| ctxt->wellFormed = 0; |
| if (ctxt->recovery == 0) |
| ctxt->disableSAX = 1; |
| } |
| |
| /** |
| * xmlFatalErrMsg: |
| * @ctxt: an XML parser context |
| * @error: the error number |
| * @msg: the error message |
| * |
| * Handle a fatal parser error, i.e. violating Well-Formedness constraints |
| */ |
| static void |
| xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
| const char *msg) |
| { |
| if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
| (ctxt->instate == XML_PARSER_EOF)) |
| return; |
| ctxt->errNo = error; |
| __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, |
| XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg); |
| ctxt->wellFormed = 0; |
| if (ctxt->recovery == 0) |
| ctxt->disableSAX = 1; |
| } |
| |
| /** |
| * xmlWarningMsg: |
| * @ctxt: an XML parser context |
| * @error: the error number |
| * @msg: the error message |
| * @str1: extra data |
| * @str2: extra data |
| * |
| * Handle a warning. |
| */ |
| static void |
| xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
| const char *msg, const xmlChar *str1, const xmlChar *str2) |
| { |
| xmlStructuredErrorFunc schannel = NULL; |
| |
| if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
| (ctxt->instate == XML_PARSER_EOF)) |
| return; |
| ctxt->errNo = error; |
| if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) |
| schannel = ctxt->sax->serror; |
| __xmlRaiseError(schannel, |
| (ctxt->sax) ? ctxt->sax->warning : NULL, |
| ctxt->userData, |
| ctxt, NULL, XML_FROM_PARSER, error, |
| XML_ERR_WARNING, NULL, 0, |
| (const char *) str1, (const char *) str2, NULL, 0, 0, |
| msg, (const char *) str1, (const char *) str2); |
| } |
| |
| /** |
| * xmlValidityError: |
| * @ctxt: an XML parser context |
| * @error: the error number |
| * @msg: the error message |
| * @str1: extra data |
| * |
| * Handle a validity error. |
| */ |
| static void |
| xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
| const char *msg, const xmlChar *str1) |
| { |
| xmlStructuredErrorFunc schannel = NULL; |
| |
| if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
| (ctxt->instate == XML_PARSER_EOF)) |
| return; |
| ctxt->errNo = error; |
| if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) |
| schannel = ctxt->sax->serror; |
| __xmlRaiseError(schannel, |
| ctxt->vctxt.error, ctxt->vctxt.userData, |
| ctxt, NULL, XML_FROM_DTD, error, |
| XML_ERR_ERROR, NULL, 0, (const char *) str1, |
| NULL, NULL, 0, 0, |
| msg, (const char *) str1); |
| ctxt->valid = 0; |
| } |
| |
| /** |
| * xmlFatalErrMsgInt: |
| * @ctxt: an XML parser context |
| * @error: the error number |
| * @msg: the error message |
| * @val: an integer value |
| * |
| * Handle a fatal parser error, i.e. violating Well-Formedness constraints |
| */ |
| static void |
| xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
| const char *msg, int val) |
| { |
| if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
| (ctxt->instate == XML_PARSER_EOF)) |
| return; |
| ctxt->errNo = error; |
| __xmlRaiseError(NULL, NULL, NULL, |
| ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, |
| NULL, 0, NULL, NULL, NULL, val, 0, msg, val); |
| ctxt->wellFormed = 0; |
| if (ctxt->recovery == 0) |
| ctxt->disableSAX = 1; |
| } |
| |
| /** |
| * xmlFatalErrMsgStrIntStr: |
| * @ctxt: an XML parser context |
| * @error: the error number |
| * @msg: the error message |
| * @str1: an string info |
| * @val: an integer value |
| * @str2: an string info |
| * |
| * Handle a fatal parser error, i.e. violating Well-Formedness constraints |
| */ |
| static void |
| xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
| const char *msg, const xmlChar *str1, int val, |
| const xmlChar *str2) |
| { |
| if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
| (ctxt->instate == XML_PARSER_EOF)) |
| return; |
| ctxt->errNo = error; |
| __xmlRaiseError(NULL, NULL, NULL, |
| ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, |
| NULL, 0, (const char *) str1, (const char *) str2, |
| NULL, val, 0, msg, str1, val, str2); |
| ctxt->wellFormed = 0; |
| if (ctxt->recovery == 0) |
| ctxt->disableSAX = 1; |
| } |
| |
| /** |
| * xmlFatalErrMsgStr: |
| * @ctxt: an XML parser context |
| * @error: the error number |
| * @msg: the error message |
| * @val: a string value |
| * |
| * Handle a fatal parser error, i.e. violating Well-Formedness constraints |
| */ |
| static void |
| xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
| const char *msg, const xmlChar * val) |
| { |
| if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
| (ctxt->instate == XML_PARSER_EOF)) |
| return; |
| ctxt->errNo = error; |
| __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, |
| XML_FROM_PARSER, error, XML_ERR_FATAL, |
| NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, |
| val); |
| ctxt->wellFormed = 0; |
| if (ctxt->recovery == 0) |
| ctxt->disableSAX = 1; |
| } |
| |
| /** |
| * xmlErrMsgStr: |
| * @ctxt: an XML parser context |
| * @error: the error number |
| * @msg: the error message |
| * @val: a string value |
| * |
| * Handle a non fatal parser error |
| */ |
| static void |
| xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
| const char *msg, const xmlChar * val) |
| { |
| if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
| (ctxt->instate == XML_PARSER_EOF)) |
| return; |
| ctxt->errNo = error; |
| __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, |
| XML_FROM_PARSER, error, XML_ERR_ERROR, |
| NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, |
| val); |
| } |
| |
| /** |
| * xmlNsErr: |
| * @ctxt: an XML parser context |
| * @error: the error number |
| * @msg: the message |
| * @info1: extra information string |
| * @info2: extra information string |
| * |
| * Handle a fatal parser error, i.e. violating Well-Formedness constraints |
| */ |
| static void |
| xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
| const char *msg, |
| const xmlChar * info1, const xmlChar * info2, |
| const xmlChar * info3) |
| { |
| if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
| (ctxt->instate == XML_PARSER_EOF)) |
| return; |
| ctxt->errNo = error; |
| __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, |
| XML_ERR_ERROR, NULL, 0, (const char *) info1, |
| (const char *) info2, (const char *) info3, 0, 0, msg, |
| info1, info2, info3); |
| ctxt->nsWellFormed = 0; |
| } |
| |
| /************************************************************************ |
| * * |
| * SAX2 defaulted attributes handling * |
| * * |
| ************************************************************************/ |
| |
| /** |
| * xmlDetectSAX2: |
| * @ctxt: an XML parser context |
| * |
| * Do the SAX2 detection and specific intialization |
| */ |
| static void |
| xmlDetectSAX2(xmlParserCtxtPtr ctxt) { |
| if (ctxt == NULL) return; |
| #ifdef LIBXML_SAX1_ENABLED |
| if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) && |
| ((ctxt->sax->startElementNs != NULL) || |
| (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1; |
| #else |
| ctxt->sax2 = 1; |
| #endif /* LIBXML_SAX1_ENABLED */ |
| |
| ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); |
| ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); |
| ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); |
| } |
| |
| typedef struct _xmlDefAttrs xmlDefAttrs; |
| typedef xmlDefAttrs *xmlDefAttrsPtr; |
| struct _xmlDefAttrs { |
| int nbAttrs; /* number of defaulted attributes on that element */ |
| int maxAttrs; /* the size of the array */ |
| const xmlChar *values[4]; /* array of localname/prefix/values */ |
| }; |
| |
| /** |
| * xmlAddDefAttrs: |
| * @ctxt: an XML parser context |
| * @fullname: the element fullname |
| * @fullattr: the attribute fullname |
| * @value: the attribute value |
| * |
| * Add a defaulted attribute for an element |
| */ |
| static void |
| xmlAddDefAttrs(xmlParserCtxtPtr ctxt, |
| const xmlChar *fullname, |
| const xmlChar *fullattr, |
| const xmlChar *value) { |
| xmlDefAttrsPtr defaults; |
| int len; |
| const xmlChar *name; |
| const xmlChar *prefix; |
| |
| if (ctxt->attsDefault == NULL) { |
| ctxt->attsDefault = xmlHashCreate(10); |
| if (ctxt->attsDefault == NULL) |
| goto mem_error; |
| } |
| |
| /* |
| * plit the element name into prefix:localname , the string found |
| * are within the DTD and hen not associated to namespace names. |
| */ |
| name = xmlSplitQName3(fullname, &len); |
| if (name == NULL) { |
| name = xmlDictLookup(ctxt->dict, fullname, -1); |
| prefix = NULL; |
| } else { |
| name = xmlDictLookup(ctxt->dict, name, -1); |
| prefix = xmlDictLookup(ctxt->dict, fullname, len); |
| } |
| |
| /* |
| * make sure there is some storage |
| */ |
| defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); |
| if (defaults == NULL) { |
| defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + |
| 12 * sizeof(const xmlChar *)); |
| if (defaults == NULL) |
| goto mem_error; |
| defaults->maxAttrs = 4; |
| defaults->nbAttrs = 0; |
| xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL); |
| } else if (defaults->nbAttrs >= defaults->maxAttrs) { |
| defaults = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + |
| (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *)); |
| if (defaults == NULL) |
| goto mem_error; |
| defaults->maxAttrs *= 2; |
| xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL); |
| } |
| |
| /* |
| * plit the element name into prefix:localname , the string found |
| * are within the DTD and hen not associated to namespace names. |
| */ |
| name = xmlSplitQName3(fullattr, &len); |
| if (name == NULL) { |
| name = xmlDictLookup(ctxt->dict, fullattr, -1); |
| prefix = NULL; |
| } else { |
| name = xmlDictLookup(ctxt->dict, name, -1); |
| prefix = xmlDictLookup(ctxt->dict, fullattr, len); |
| } |
| |
| defaults->values[4 * defaults->nbAttrs] = name; |
| defaults->values[4 * defaults->nbAttrs + 1] = prefix; |
| /* intern the string and precompute the end */ |
| len = xmlStrlen(value); |
| value = xmlDictLookup(ctxt->dict, value, len); |
| defaults->values[4 * defaults->nbAttrs + 2] = value; |
| defaults->values[4 * defaults->nbAttrs + 3] = value + len; |
| defaults->nbAttrs++; |
| |
| return; |
| |
| mem_error: |
| xmlErrMemory(ctxt, NULL); |
| return; |
| } |
| |
| /** |
| * xmlAddSpecialAttr: |
| * @ctxt: an XML parser context |
| * @fullname: the element fullname |
| * @fullattr: the attribute fullname |
| * @type: the attribute type |
| * |
| * Register that this attribute is not CDATA |
| */ |
| static void |
| xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, |
| const xmlChar *fullname, |
| const xmlChar *fullattr, |
| int type) |
| { |
| if (ctxt->attsSpecial == NULL) { |
| ctxt->attsSpecial = xmlHashCreate(10); |
| if (ctxt->attsSpecial == NULL) |
| goto mem_error; |
| } |
| |
| xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, |
| (void *) (long) type); |
| return; |
| |
| mem_error: |
| xmlErrMemory(ctxt, NULL); |
| return; |
| } |
| |
| /** |
| * xmlCheckLanguageID: |
| * @lang: pointer to the string value |
| * |
| * Checks that the value conforms to the LanguageID production: |
| * |
| * NOTE: this is somewhat deprecated, those productions were removed from |
| * the XML Second edition. |
| * |
| * [33] LanguageID ::= Langcode ('-' Subcode)* |
| * [34] Langcode ::= ISO639Code | IanaCode | UserCode |
| * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) |
| * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ |
| * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ |
| * [38] Subcode ::= ([a-z] | [A-Z])+ |
| * |
| * Returns 1 if correct 0 otherwise |
| **/ |
| int |
| xmlCheckLanguageID(const xmlChar * lang) |
| { |
| const xmlChar *cur = lang; |
| |
| if (cur == NULL) |
| return (0); |
| if (((cur[0] == 'i') && (cur[1] == '-')) || |
| ((cur[0] == 'I') && (cur[1] == '-'))) { |
| /* |
| * IANA code |
| */ |
| cur += 2; |
| while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ |
| ((cur[0] >= 'a') && (cur[0] <= 'z'))) |
| cur++; |
| } else if (((cur[0] == 'x') && (cur[1] == '-')) || |
| ((cur[0] == 'X') && (cur[1] == '-'))) { |
| /* |
| * User code |
| */ |
| cur += 2; |
| while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ |
| ((cur[0] >= 'a') && (cur[0] <= 'z'))) |
| cur++; |
| } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || |
| ((cur[0] >= 'a') && (cur[0] <= 'z'))) { |
| /* |
| * ISO639 |
| */ |
| cur++; |
| if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || |
| ((cur[0] >= 'a') && (cur[0] <= 'z'))) |
| cur++; |
| else |
| return (0); |
| } else |
| return (0); |
| while (cur[0] != 0) { /* non input consuming */ |
| if (cur[0] != '-') |
| return (0); |
| cur++; |
| if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || |
| ((cur[0] >= 'a') && (cur[0] <= 'z'))) |
| cur++; |
| else |
| return (0); |
| while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ |
| ((cur[0] >= 'a') && (cur[0] <= 'z'))) |
| cur++; |
| } |
| return (1); |
| } |
| |
| /************************************************************************ |
| * * |
| * Parser stacks related functions and macros * |
| * * |
| ************************************************************************/ |
| |
| xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, |
| const xmlChar ** str); |
| |
| #ifdef SAX2 |
| /** |
| * nsPush: |
| * @ctxt: an XML parser context |
| * @prefix: the namespace prefix or NULL |
| * @URL: the namespace name |
| * |
| * Pushes a new parser namespace on top of the ns stack |
| * |
| * Returns -1 in case of error, -2 if the namespace should be discarded |
| * and the index in the stack otherwise. |
| */ |
| static int |
| nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) |
| { |
| if (ctxt->options & XML_PARSE_NSCLEAN) { |
| int i; |
| for (i = 0;i < ctxt->nsNr;i += 2) { |
| if (ctxt->nsTab[i] == prefix) { |
| /* in scope */ |
| if (ctxt->nsTab[i + 1] == URL) |
| return(-2); |
| /* out of scope keep it */ |
| break; |
| } |
| } |
| } |
| if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { |
| ctxt->nsMax = 10; |
| ctxt->nsNr = 0; |
| ctxt->nsTab = (const xmlChar **) |
| xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); |
| if (ctxt->nsTab == NULL) { |
| xmlErrMemory(ctxt, NULL); |
| ctxt->nsMax = 0; |
| return (-1); |
| } |
| } else if (ctxt->nsNr >= ctxt->nsMax) { |
| ctxt->nsMax *= 2; |
| ctxt->nsTab = (const xmlChar **) |
| xmlRealloc((char *) ctxt->nsTab, |
| ctxt->nsMax * sizeof(ctxt->nsTab[0])); |
| if (ctxt->nsTab == NULL) { |
| xmlErrMemory(ctxt, NULL); |
| ctxt->nsMax /= 2; |
| return (-1); |
| } |
| } |
| ctxt->nsTab[ctxt->nsNr++] = prefix; |
| ctxt->nsTab[ctxt->nsNr++] = URL; |
| return (ctxt->nsNr); |
| } |
| /** |
| * nsPop: |
| * @ctxt: an XML parser context |
| * @nr: the number to pop |
| * |
| * Pops the top @nr parser prefix/namespace from the ns stack |
| * |
| * Returns the number of namespaces removed |
| */ |
| static int |
| nsPop(xmlParserCtxtPtr ctxt, int nr) |
| { |
| int i; |
| |
| if (ctxt->nsTab == NULL) return(0); |
| if (ctxt->nsNr < nr) { |
| xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); |
| nr = ctxt->nsNr; |
| } |
| if (ctxt->nsNr <= 0) |
| return (0); |
| |
| for (i = 0;i < nr;i++) { |
| ctxt->nsNr--; |
| ctxt->nsTab[ctxt->nsNr] = NULL; |
| } |
| return(nr); |
| } |
| #endif |
| |
| static int |
| xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { |
| const xmlChar **atts; |
| int *attallocs; |
| int maxatts; |
| |
| if (ctxt->atts == NULL) { |
| maxatts = 55; /* allow for 10 attrs by default */ |
| atts = (const xmlChar **) |
| xmlMalloc(maxatts * sizeof(xmlChar *)); |
| if (atts == NULL) goto mem_error; |
| ctxt->atts = atts; |
| attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); |
| if (attallocs == NULL) goto mem_error; |
| ctxt->attallocs = attallocs; |
| ctxt->maxatts = maxatts; |
| } else if (nr + 5 > ctxt->maxatts) { |
| maxatts = (nr + 5) * 2; |
| atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, |
| maxatts * sizeof(const xmlChar *)); |
| if (atts == NULL) goto mem_error; |
| ctxt->atts = atts; |
| attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, |
| (maxatts / 5) * sizeof(int)); |
| if (attallocs == NULL) goto mem_error; |
| ctxt->attallocs = attallocs; |
| ctxt->maxatts = maxatts; |
| } |
| return(ctxt->maxatts); |
| mem_error: |
| xmlErrMemory(ctxt, NULL); |
| return(-1); |
| } |
| |
| /** |
| * inputPush: |
| * @ctxt: an XML parser context |
| * @value: the parser input |
| * |
| * Pushes a new parser input on top of the input stack |
| * |
| * Returns 0 in case of error, the index in the stack otherwise |
| */ |
| extern int |
| inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) |
| { |
| if (ctxt->inputNr >= ctxt->inputMax) { |
| ctxt->inputMax *= 2; |
| ctxt->inputTab = |
| (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, |
| ctxt->inputMax * |
| sizeof(ctxt->inputTab[0])); |
| if (ctxt->inputTab == NULL) { |
| xmlErrMemory(ctxt, NULL); |
| return (0); |
| } |
| } |
| ctxt->inputTab[ctxt->inputNr] = value; |
| ctxt->input = value; |
| return (ctxt->inputNr++); |
| } |
| /** |
| * inputPop: |
| * @ctxt: an XML parser context |
| * |
| * Pops the top parser input from the input stack |
| * |
| * Returns the input just removed |
| */ |
| extern xmlParserInputPtr |
| inputPop(xmlParserCtxtPtr ctxt) |
| { |
| xmlParserInputPtr ret; |
| |
| if (ctxt->inputNr <= 0) |
| return (0); |
| ctxt->inputNr--; |
| if (ctxt->inputNr > 0) |
| ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; |
| else |
| ctxt->input = NULL; |
| ret = ctxt->inputTab[ctxt->inputNr]; |
| ctxt->inputTab[ctxt->inputNr] = 0; |
| return (ret); |
| } |
| /** |
| * nodePush: |
| * @ctxt: an XML parser context |
| * @value: the element node |
| * |
| * Pushes a new element node on top of the node stack |
| * |
| * Returns 0 in case of error, the index in the stack otherwise |
| */ |
| extern int |
| nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) |
| { |
| if (ctxt->nodeNr >= ctxt->nodeMax) { |
| ctxt->nodeMax *= 2; |
| ctxt->nodeTab = |
| (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, |
| ctxt->nodeMax * |
| sizeof(ctxt->nodeTab[0])); |
| if (ctxt->nodeTab == NULL) { |
| xmlErrMemory(ctxt, NULL); |
| return (0); |
| } |
| } |
| if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) { |
| xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, |
| "Excessive depth in document: change xmlParserMaxDepth = %d\n", |
| xmlParserMaxDepth); |
| ctxt->instate = XML_PARSER_EOF; |
| return(0); |
| } |
| ctxt->nodeTab[ctxt->nodeNr] = value; |
| ctxt->node = value; |
| return (ctxt->nodeNr++); |
| } |
| /** |
| * nodePop: |
| * @ctxt: an XML parser context |
| * |
| * Pops the top element node from the node stack |
| * |
| * Returns the node just removed |
| */ |
| extern xmlNodePtr |
| nodePop(xmlParserCtxtPtr ctxt) |
| { |
| xmlNodePtr ret; |
| |
| if (ctxt->nodeNr <= 0) |
| return (0); |
| ctxt->nodeNr--; |
| if (ctxt->nodeNr > 0) |
| ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; |
| else |
| ctxt->node = NULL; |
| ret = ctxt->nodeTab[ctxt->nodeNr]; |
| ctxt->nodeTab[ctxt->nodeNr] = 0; |
| return (ret); |
| } |
| /** |
| * nameNsPush: |
| * @ctxt: an XML parser context |
| * @value: the element name |
| * @prefix: the element prefix |
| * @URI: the element namespace name |
| * |
| * Pushes a new element name/prefix/URL on top of the name stack |
| * |
| * Returns -1 in case of error, the index in the stack otherwise |
| */ |
| static int |
| nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, |
| const xmlChar *prefix, const xmlChar *URI, int nsNr) |
| { |
| if (ctxt->nameNr >= ctxt->nameMax) { |
| const xmlChar * *tmp; |
| void **tmp2; |
| ctxt->nameMax *= 2; |
| tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, |
| ctxt->nameMax * |
| sizeof(ctxt->nameTab[0])); |
| if (tmp == NULL) { |
| ctxt->nameMax /= 2; |
| goto mem_error; |
| } |
| ctxt->nameTab = tmp; |
| tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab, |
| ctxt->nameMax * 3 * |
| sizeof(ctxt->pushTab[0])); |
| if (tmp2 == NULL) { |
| ctxt->nameMax /= 2; |
| goto mem_error; |
| } |
| ctxt->pushTab = tmp2; |
| } |
| ctxt->nameTab[ctxt->nameNr] = value; |
| ctxt->name = value; |
| ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix; |
| ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI; |
| ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr; |
| return (ctxt->nameNr++); |
| mem_error: |
| xmlErrMemory(ctxt, NULL); |
| return (-1); |
| } |
| /** |
| * nameNsPop: |
| * @ctxt: an XML parser context |
| * |
| * Pops the top element/prefix/URI name from the name stack |
| * |
| * Returns the name just removed |
| */ |
| static const xmlChar * |
| nameNsPop(xmlParserCtxtPtr ctxt) |
| { |
| const xmlChar *ret; |
| |
| if (ctxt->nameNr <= 0) |
| return (0); |
| ctxt->nameNr--; |
| if (ctxt->nameNr > 0) |
| ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; |
| else |
| ctxt->name = NULL; |
| ret = ctxt->nameTab[ctxt->nameNr]; |
| ctxt->nameTab[ctxt->nameNr] = NULL; |
| return (ret); |
| } |
| |
| /** |
| * namePush: |
| * @ctxt: an XML parser context |
| * @value: the element name |
| * |
| * Pushes a new element name on top of the name stack |
| * |
| * Returns -1 in case of error, the index in the stack otherwise |
| */ |
| extern int |
| namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) |
| { |
| if (ctxt->nameNr >= ctxt->nameMax) { |
| const xmlChar * *tmp; |
| ctxt->nameMax *= 2; |
| tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, |
| ctxt->nameMax * |
| sizeof(ctxt->nameTab[0])); |
| if (tmp == NULL) { |
| ctxt->nameMax /= 2; |
| goto mem_error; |
| } |
| ctxt->nameTab = tmp; |
| } |
| ctxt->nameTab[ctxt->nameNr] = value; |
| ctxt->name = value; |
| return (ctxt->nameNr++); |
| mem_error: |
| xmlErrMemory(ctxt, NULL); |
| return (-1); |
| } |
| /** |
| * namePop: |
| * @ctxt: an XML parser context |
| * |
| * Pops the top element name from the name stack |
| * |
| * Returns the name just removed |
| */ |
| extern const xmlChar * |
| namePop(xmlParserCtxtPtr ctxt) |
| { |
| const xmlChar *ret; |
| |
| if (ctxt->nameNr <= 0) |
| return (0); |
| ctxt->nameNr--; |
| if (ctxt->nameNr > 0) |
| ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; |
| else |
| ctxt->name = NULL; |
| ret = ctxt->nameTab[ctxt->nameNr]; |
| ctxt->nameTab[ctxt->nameNr] = 0; |
| return (ret); |
| } |
| |
| static int spacePush(xmlParserCtxtPtr ctxt, int val) { |
| if (ctxt->spaceNr >= ctxt->spaceMax) { |
| ctxt->spaceMax *= 2; |
| ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab, |
| ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); |
| if (ctxt->spaceTab == NULL) { |
| xmlErrMemory(ctxt, NULL); |
| return(0); |
| } |
| } |
| ctxt->spaceTab[ctxt->spaceNr] = val; |
| ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; |
| return(ctxt->spaceNr++); |
| } |
| |
| static int spacePop(xmlParserCtxtPtr ctxt) { |
| int ret; |
| if (ctxt->spaceNr <= 0) return(0); |
| ctxt->spaceNr--; |
| if (ctxt->spaceNr > 0) |
| ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; |
| else |
| ctxt->space = NULL; |
| ret = ctxt->spaceTab[ctxt->spaceNr]; |
| ctxt->spaceTab[ctxt->spaceNr] = -1; |
| return(ret); |
| } |
| |
| /* |
| * Macros for accessing the content. Those should be used only by the parser, |
| * and not exported. |
| * |
| * Dirty macros, i.e. one often need to make assumption on the context to |
| * use them |
| * |
| * CUR_PTR return the current pointer to the xmlChar to be parsed. |
| * To be used with extreme caution since operations consuming |
| * characters may move the input buffer to a different location ! |
| * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled |
| * This should be used internally by the parser |
| * only to compare to ASCII values otherwise it would break when |
| * running with UTF-8 encoding. |
| * RAW same as CUR but in the input buffer, bypass any token |
| * extraction that may have been done |
| * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only |
| * to compare on ASCII based substring. |
| * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined |
| * strings without newlines within the parser. |
| * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII |
| * defined char within the parser. |
| * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding |
| * |
| * NEXT Skip to the next character, this does the proper decoding |
| * in UTF-8 mode. It also pop-up unfinished entities on the fly. |
| * NEXTL(l) Skip the current unicode character of l xmlChars long. |
| * CUR_CHAR(l) returns the current unicode character (int), set l |
| * to the number of xmlChars used for the encoding [0-5]. |
| * CUR_SCHAR same but operate on a string instead of the context |
| * COPY_BUF copy the current unicode char to the target buffer, increment |
| * the index |
| * GROW, SHRINK handling of input buffers |
| */ |
| |
| #define RAW (*ctxt->input->cur) |
| #define CUR (*ctxt->input->cur) |
| #define NXT(val) ctxt->input->cur[(val)] |
| #define CUR_PTR ctxt->input->cur |
| |
| #define CMP4( s, c1, c2, c3, c4 ) \ |
| ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ |
| ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) |
| #define CMP5( s, c1, c2, c3, c4, c5 ) \ |
| ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) |
| #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ |
| ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) |
| #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ |
| ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) |
| #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ |
| ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) |
| #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ |
| ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ |
| ((unsigned char *) s)[ 8 ] == c9 ) |
| #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ |
| ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ |
| ((unsigned char *) s)[ 9 ] == c10 ) |
| |
| #define SKIP(val) do { \ |
| ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \ |
| if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ |
| if ((*ctxt->input->cur == 0) && \ |
| (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ |
| xmlPopInput(ctxt); \ |
| } while (0) |
| |
| #define SHRINK if ((ctxt->progressive == 0) && \ |
| (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ |
| (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ |
| xmlSHRINK (ctxt); |
| |
| static void xmlSHRINK (xmlParserCtxtPtr ctxt) { |
| xmlParserInputShrink(ctxt->input); |
| if ((*ctxt->input->cur == 0) && |
| (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) |
| xmlPopInput(ctxt); |
| } |
| |
| #define GROW if ((ctxt->progressive == 0) && \ |
| (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ |
| xmlGROW (ctxt); |
| |
| static void xmlGROW (xmlParserCtxtPtr ctxt) { |
| xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
| if ((*ctxt->input->cur == 0) && |
| (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) |
| xmlPopInput(ctxt); |
| } |
| |
| #define SKIP_BLANKS xmlSkipBlankChars(ctxt) |
| |
| #define NEXT xmlNextChar(ctxt) |
| |
| #define NEXT1 { \ |
| ctxt->input->col++; \ |
| ctxt->input->cur++; \ |
| ctxt->nbChars++; \ |
| if (*ctxt->input->cur == 0) \ |
| xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ |
| } |
| |
| #define NEXTL(l) do { \ |
| if (*(ctxt->input->cur) == '\n') { \ |
| ctxt->input->line++; ctxt->input->col = 1; \ |
| } else ctxt->input->col++; \ |
| ctxt->input->cur += l; \ |
| if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ |
| } while (0) |
| |
| #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) |
| #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) |
| |
| #define COPY_BUF(l,b,i,v) \ |
| if (l == 1) b[i++] = (xmlChar) v; \ |
| else i += xmlCopyCharMultiByte(&b[i],v) |
| |
| /** |
| * xmlSkipBlankChars: |
| * @ctxt: the XML parser context |
| * |
| * skip all blanks character found at that point in the input streams. |
| * It pops up finished entities in the process if allowable at that point. |
| * |
| * Returns the number of space chars skipped |
| */ |
| |
| int |
| xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { |
| int res = 0; |
| |
| /* |
| * It's Okay to use CUR/NEXT here since all the blanks are on |
| * the ASCII range. |
| */ |
| if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { |
| const xmlChar *cur; |
| /* |
| * if we are in the document content, go really fast |
| */ |
| cur = ctxt->input->cur; |
| while (IS_BLANK_CH(*cur)) { |
| if (*cur == '\n') { |
| ctxt->input->line++; ctxt->input->col = 1; |
| } |
| cur++; |
| res++; |
| if (*cur == 0) { |
| ctxt->input->cur = cur; |
| xmlParserInputGrow(ctxt->input, INPUT_CHUNK); |
| cur = ctxt->input->cur; |
| } |
| } |
| ctxt->input->cur = cur; |
| } else { |
| int cur; |
| do { |
| cur = CUR; |
| while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */ |
| NEXT; |
| cur = CUR; |
| res++; |
| } |
| while ((cur == 0) && (ctxt->inputNr > 1) && |
| (ctxt->instate != XML_PARSER_COMMENT)) { |
| xmlPopInput(ctxt); |
| cur = CUR; |
| } |
| /* |
| * Need to handle support of entities branching here |
| */ |
| if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); |
| } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ |
| } |
| return(res); |
| } |
| |
| /************************************************************************ |
| * * |
| * Commodity functions to handle entities * |
| * * |
| ************************************************************************/ |
| |
| /** |
| * xmlPopInput: |
| * @ctxt: an XML parser context |
| * |
| * xmlPopInput: the current input pointed by ctxt->input came to an end |
| * pop it and return the next char. |
| * |
| * Returns the current xmlChar in the parser context |
| */ |
| xmlChar |
| xmlPopInput(xmlParserCtxtPtr ctxt) { |
| if (ctxt->inputNr == 1) return(0); /* End of main Input */ |
| if (xmlParserDebugEntities) |
| xmlGenericError(xmlGenericErrorContext, |
| "Popping input %d\n", ctxt->inputNr); |
| xmlFreeInputStream(inputPop(ctxt)); |
| if ((*ctxt->input->cur == 0) && |
| (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) |
| return(xmlPopInput(ctxt)); |
| return(CUR); |
| } |
| |
| /** |
| * xmlPushInput: |
| * @ctxt: an XML parser context |
| * @input: an XML parser input fragment (entity, XML fragment ...). |
| * |
| * xmlPushInput: switch to a new input stream which is stacked on top |
| * of the previous one(s). |
| */ |
| void |
| xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { |
| if (input == NULL) return; |
| |
| if (xmlParserDebugEntities) { |
| if ((ctxt->input != NULL) && (ctxt->input->filename)) |
| xmlGenericError(xmlGenericErrorContext, |
| "%s(%d): ", ctxt->input->filename, |
| ctxt->input->line); |
| xmlGenericError(xmlGenericErrorContext, |
| "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); |
| } |
| inputPush(ctxt, input); |
| GROW; |
| } |
| |
| /** |
| * xmlParseCharRef: |
| * @ctxt: an XML parser context |
| * |
| * parse Reference declarations |
| * |
| * [66] CharRef ::= '&#' [0-9]+ ';' | |
| * '&#x' [0-9a-fA-F]+ ';' |
| * |
| * [ WFC: Legal Character ] |
| * Characters referred to using character references must match the |
| * production for Char. |
| * |
| * Returns the value parsed (as an int), 0 in case of error |
| */ |
| int |
| xmlParseCharRef(xmlParserCtxtPtr ctxt) { |
| unsigned int val = 0; |
| int count = 0; |
| |
| /* |
| * Using RAW/CUR/NEXT is okay since we are working on ASCII range here |
| */ |
| if ((RAW == '&') && (NXT(1) == '#') && |
| (NXT(2) == 'x')) { |
| SKIP(3); |
| GROW; |
| while (RAW != ';') { /* loop blocked by count */ |
| if (count++ > 20) { |
| count = 0; |
| GROW; |
| } |
| if ((RAW >= '0') && (RAW <= '9')) |
| val = val * 16 + (CUR - '0'); |
| else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) |
| val = val * 16 + (CUR - 'a') + 10; |
| else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) |
| val = val * 16 + (CUR - 'A') + 10; |
| else { |
| xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); |
| val = 0; |
| break; |
| } |
| NEXT; |
| count++; |
| } |
| if (RAW == ';') { |
| /* on purpose to avoid reentrancy problems with NEXT and SKIP */ |
| ctxt->input->col++; |
| ctxt->nbChars ++; |
| ctxt->input->cur++; |
| } |
| } else if ((RAW == '&') && (NXT(1) == '#')) { |
| SKIP(2); |
| GROW; |
| while (RAW != ';') { /* loop blocked by count */ |
| if (count++ > 20) { |
| count = 0; |
| GROW; |
| } |
| if ((RAW >= '0') && (RAW <= '9')) |
| val = val * 10 + (CUR - '0'); |
| else { |
| xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); |
| val = 0; |
| break; |
| } |
| NEXT; |
| count++; |
| } |
| if (RAW == ';') { |
| /* on purpose to avoid reentrancy problems with NEXT and SKIP */ |
| ctxt->input->col++; |
| ctxt->nbChars ++; |
| ctxt->input->cur++; |
| } |
| } else { |
| xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); |
| } |
| |
| /* |
| * [ WFC: Legal Character ] |
| * Characters referred to using character references must match the |
| * production for Char. |
| */ |
| if (IS_CHAR(val)) { |
| return(val); |
| } else { |
| xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, |
| "xmlParseCharRef: invalid xmlChar value %d\n", |
| val); |
| } |
| return(0); |
| } |
| |
| /** |
| * xmlParseStringCharRef: |
| * @ctxt: an XML parser context |
| * @str: a pointer to an index in the string |
| * |
| * parse Reference declarations, variant parsing from a string rather |
| * than an an input flow. |
| * |
| * [66] CharRef ::= '&#' [0-9]+ ';' | |
| * '&#x' [0-9a-fA-F]+ ';' |
| * |
| * [ WFC: Legal Character ] |
| * Characters referred to using character references must match the |
| * production for Char. |
| * |
| * Returns the value parsed (as an int), 0 in case of error, str will be |
| * updated to the current value of the index |
| */ |
| static int |
| xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { |
| const xmlChar *ptr; |
| xmlChar cur; |
| int val = 0; |
| |
| if ((str == NULL) || (*str == NULL)) return(0); |
| ptr = *str; |
| cur = *ptr; |
| if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { |
| ptr += 3; |
| cur = *ptr; |
| while (cur != ';') { /* Non input consuming loop */ |
| if ((cur >= '0') && (cur <= '9')) |
| val = val * 16 + (cur - '0'); |
| else if ((cur >= 'a') && (cur <= 'f')) |
| val = val * 16 + (cur - 'a') + 10; |
| else if ((cur >= 'A') && (cur <= 'F')) |
| val = val * 16 + (cur - 'A') + 10; |
| else { |
| xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); |
| val = 0; |
| break; |
| } |
| ptr++; |
| cur = *ptr; |
| } |
| if (cur == ';') |
| ptr++; |
| } else if ((cur == '&') && (ptr[1] == '#')){ |
| ptr += 2; |
| cur = *ptr; |
| while (cur != ';') { /* Non input consuming loops */ |
| if ((cur >= '0') && (cur <= '9')) |
| val = val * 10 + (cur - '0'); |
| else { |
| xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); |
| val = 0; |
| break; |
| } |
| ptr++; |
| cur = *ptr; |
| } |
| if (cur == ';') |
| ptr++; |
| } else { |
| xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); |
| return(0); |
| } |
| *str = ptr; |
| |
| /* |
| * [ WFC: Legal Character ] |
| * Characters referred to using character references must match the |
| * production for Char. |
| */ |
| if (IS_CHAR(val)) { |
| return(val); |
| } else { |
| xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, |
| "xmlParseStringCharRef: invalid xmlChar value %d\n", |
| val); |
| } |
| return(0); |
| } |
| |
| /** |
| * xmlNewBlanksWrapperInputStream: |
| * @ctxt: an XML parser context |
| * @entity: an Entity pointer |
| * |
| * Create a new input stream for wrapping |
| * blanks around a PEReference |
| * |
| * Returns the new input stream or NULL |
| */ |
| |
| static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} |
| |
| static xmlParserInputPtr |
| xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { |
| xmlParserInputPtr input; |
| xmlChar *buffer; |
| size_t length; |
| if (entity == NULL) { |
| xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, |
| "xmlNewBlanksWrapperInputStream entity\n"); |
| return(NULL); |
| } |
| if (xmlParserDebugEntities) |
| xmlGenericError(xmlGenericErrorContext, |
| "new blanks wrapper for entity: %s\n", entity->name); |
| input = xmlNewInputStream(ctxt); |
| if (input == NULL) { |
| return(NULL); |
| } |
| length = xmlStrlen(entity->name) + 5; |
| buffer = xmlMallocAtomic(length); |
| if (buffer == NULL) { |
| xmlErrMemory(ctxt, NULL); |
| return(NULL); |
| } |
| buffer [0] = ' '; |
| buffer [1] = '%'; |
| buffer [length-3] = ';'; |
| buffer [length-2] = ' '; |
| buffer [length-1] = 0; |
| memcpy(buffer + 2, entity->name, length - 5); |
| input->free = deallocblankswrapper; |
| input->base = buffer; |
| input->cur = buffer; |
| input->length = length; |
| input->end = &buffer[length]; |
| return(input); |
| } |
| |
| /** |
| * xmlParserHandlePEReference: |
| * @ctxt: the parser context |
| * |
| * [69] PEReference ::= '%' Name ';' |
| * |
| * [ WFC: No Recursion ] |
| * A parsed entity must not contain a recursive |
| * reference to itself, either directly or indirectly. |
| * |
| * [ WFC: Entity Declared ] |
| * In a document without any DTD, a document with only an internal DTD |
| * subset which contains no parameter entity references, or a document |
| * with "standalone='yes'", ... ... The declaration of a parameter |
| * entity must precede any reference to it... |
| * |
| * [ VC: Entity Declared ] |
| * In a document with an external subset or external parameter entities |
| * with "standalone='no'", ... ... The declaration of a parameter entity |
| * must precede any reference to it... |
| * |
| * [ WFC: In DTD ] |
| * Parameter-entity references may only appear in the DTD. |
| * NOTE: misleading but this is handled. |
| * |
| * A PEReference may have been detected in the current input stream |
| * the handling is done accordingly to |
| * http://www.w3.org/TR/REC-xml#entproc |
| * i.e. |
| * - Included in literal in entity values |
| * - Included as Parameter Entity reference within DTDs |
| */ |
| void |
| xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { |
| const xmlChar *name; |
| xmlEntityPtr entity = NULL; |
| xmlParserInputPtr input; |
| |
| if (RAW != '%') return; |
| switch(ctxt->instate) { |
| case XML_PARSER_CDATA_SECTION: |
| return; |
| case XML_PARSER_COMMENT: |
| return; |
| case XML_PARSER_START_TAG: |
| return; |
| case XML_PARSER_END_TAG: |
| return; |
| case XML_PARSER_EOF: |
| xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); |
| return; |
| case XML_PARSER_PROLOG: |
| case XML_PARSER_START: |
| case XML_PARSER_MISC: |
| xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); |
| return; |
| case XML_PARSER_ENTITY_DECL: |
| case XML_PARSER_CONTENT: |
| case XML_PARSER_ATTRIBUTE_VALUE: |
| case XML_PARSER_PI: |
| case XML_PARSER_SYSTEM_LITERAL: |
| case XML_PARSER_PUBLIC_LITERAL: |
| /* we just ignore it there */ |
| return; |
| case XML_PARSER_EPILOG: |
| xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); |
| return; |
| case XML_PARSER_ENTITY_VALUE: |
| /* |
| * NOTE: in the case of entity values, we don't do the |
| * substitution here since we need the literal |
| * entity value to be able to save the internal |
| * subset of the document. |
| * This will be handled by xmlStringDecodeEntities |
| */ |
| return; |
| case XML_PARSER_DTD: |
| /* |
| * [WFC: Well-Formedness Constraint: PEs in Internal Subset] |
| * In the internal DTD subset, parameter-entity references |
| * can occur only where markup declarations can occur, not |
| * within markup declarations. |
| * In that case this is handled in xmlParseMarkupDecl |
| */ |
| if ((ctxt->external == 0) && (ctxt->inputNr == 1)) |
| return; |
| if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) |
| return; |
| break; |
| case XML_PARSER_IGNORE: |
| return; |
| } |
| |
| NEXT; |
| name = xmlParseName(ctxt); |
| if (xmlParserDebugEntities) |
| xmlGenericError(xmlGenericErrorContext, |
| "PEReference: %s\n", name); |
| if (name == NULL) { |
| xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL); |
| } else { |
| if (RAW == ';') { |
| NEXT; |
| if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) |
| entity = ctxt->sax->getParameterEntity(ctxt->userData, name); |
| if (entity == NULL) { |
| |
| /* |
| * [ WFC: Entity Declared ] |
| * In a document without any DTD, a document with only an |
| * internal DTD subset which contains no parameter entity |
| * references, or a document with "standalone='yes'", ... |
| * ... The declaration of a parameter entity must precede |
| * any reference to it... |
| */ |
| if ((ctxt->standalone == 1) || |
| ((ctxt->hasExternalSubset == 0) && |
| (ctxt->hasPErefs == 0))) { |
| xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, |
| "PEReference: %%%s; not found\n", name); |
| } else { |
| /* |
| * [ VC: Entity Declared ] |
| * In a document with an external subset or external |
| * parameter entities with "standalone='no'", ... |
| * ... The declaration of a parameter entity must precede |
| * any reference to it... |
| */ |
| if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { |
| xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, |
| "PEReference: %%%s; not found\n", |
| name); |
| } else |
| xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, |
| "PEReference: %%%s; not found\n", |
| name, NULL); |
| ctxt->valid = 0; |
| } |
| } else if (ctxt->input->free != deallocblankswrapper) { |
| input = xmlNewBlanksWrapperInputStream(ctxt, entity); |
| xmlPushInput(ctxt, input); |
| } else { |
| if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || |
| (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { |
| xmlChar start[4]; |
| xmlCharEncoding enc; |
| |
| /* |
| * handle the extra spaces added before and after |
| * c.f. http://www.w3.org/TR/REC-xml#as-PE |
| * this is done independently. |
| */ |
| input = xmlNewEntityInputStream(ctxt, entity); |
| xmlPushInput(ctxt, input); |
| |
| /* |
| * Get the 4 first bytes and decode the charset |
| * if enc != XML_CHAR_ENCODING_NONE |
| * plug some encoding conversion routines. |
| */ |
| GROW |
| if (entity->length >= 4) { |
| start[0] = RAW; |
| start[1] = NXT(1); |
| start[2] = NXT(2); |
| start[3] = NXT(3); |
| enc = xmlDetectCharEncoding(start, 4); |
| if (enc != XML_CHAR_ENCODING_NONE) { |
| xmlSwitchEncoding(ctxt, enc); |
| } |
| } |
| |
| if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && |
| (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) && |
| (IS_BLANK_CH(NXT(5)))) { |
| xmlParseTextDecl(ctxt); |
| } |
| } else { |
| xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, |
| "PEReference: %s is not a parameter entity\n", |
| name); |
| } |
| } |
| } else { |
| xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); |
| } |
| } |
| } |
| |
| /* |
| * Macro used to grow the current buffer. |
| */ |
| #define growBuffer(buffer) { \ |
| buffer##_size *= 2; \ |
| buffer = (xmlChar *) \ |
| xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ |
| if (buffer == NULL) goto mem_error; \ |
| } |
| |
| /** |
| * xmlStringLenDecodeEntities: |
| * @ctxt: the parser context |
| * @str: the input string |
| * @len: the string length |
| * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF |
| * @end: an end marker xmlChar, 0 if none |
| * @end2: an end marker xmlChar, 0 if none |
| * @end3: an end marker xmlChar, 0 if none |
| * |
| * Takes a entity string content and process to do the adequate substitutions. |
| * |
| * [67] Reference ::= EntityRef | CharRef |
| * |
| * [69] PEReference ::= '%' Name ';' |
| * |
| * Returns A newly allocated string with the substitution done. The caller |
| * must deallocate it ! |
| */ |
| xmlChar * |
| xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, |
| int what, xmlChar end, xmlChar end2, xmlChar end3) { |
| xmlChar *buffer = NULL; |
| int buffer_size = 0; |
| |
| xmlChar *current = NULL; |
| const xmlChar *last; |
| xmlEntityPtr ent; |
| int c,l; |
| int nbchars = 0; |
| |
| if ((str == NULL) || (len < 0)) |
| return(NULL); |
| last = str + len; |
| |
| if (ctxt->depth > 40) { |
| xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); |
| return(NULL); |
| } |
| |
| /* |
| * allocate a translation buffer. |
| */ |
| buffer_size = XML_PARSER_BIG_BUFFER_SIZE; |
| buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar)); |
| if (buffer == NULL) goto mem_error; |
| |
| /* |
| * OK loop until we reach one of the ending char or a size limit. |
| * we are operating on already parsed values. |
| */ |
| if (str < last) |
| c = CUR_SCHAR(str, l); |
| else |
| c = 0; |
| while ((c != 0) && (c != end) && /* non input consuming loop */ |
| (c != end2) && (c != end3)) { |
| |
| if (c == 0) break; |
| if ((c == '&') && (str[1] == '#')) { |
| int val = xmlParseStringCharRef(ctxt, &str); |
| if (val != 0) { |
| COPY_BUF(0,buffer,nbchars,val); |
| } |
| } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { |
| if (xmlParserDebugEntities) |
| xmlGenericError(xmlGenericErrorContext, |
| "String decoding Entity Reference: %.30s\n", |
| str); |
| ent = xmlParseStringEntityRef(ctxt, &str); |
| if ((ent != NULL) && |
| (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { |
| if (ent->content != NULL) { |
| COPY_BUF(0,buffer,nbchars,ent->content[0]); |
| } else { |
| xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, |
| "predefined entity has no content\n"); |
| } |
| } else if ((ent != NULL) && (ent->content != NULL)) { |
| xmlChar *rep; |
| |
| ctxt->depth++; |
| rep = xmlStringDecodeEntities(ctxt, ent->content, what, |
| 0, 0, 0); |
| ctxt->depth--; |
| if (rep != NULL) { |
| current = rep; |
| while (*current != 0) { /* non input consuming loop */ |
| buffer[nbchars++] = *current++; |
| if (nbchars > |
| buffer_size - XML_PARSER_BUFFER_SIZE) { |
| growBuffer(buffer); |
| } |
| } |
| xmlFree(rep); |
| } |
| } else if (ent != NULL) { |
| int i = xmlStrlen(ent->name); |
| const xmlChar *cur = ent->name; |
| |
| buffer[nbchars++] = '&'; |
| if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { |
| growBuffer(buffer); |
| } |
| for (;i > 0;i--) |
| buffer[nbchars++] = *cur++; |
| buffer[nbchars++] = ';'; |
| } |
| } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { |
| if (xmlParserDebugEntities) |
| xmlGenericError(xmlGenericErrorContext, |
| "String decoding PE Reference: %.30s\n", str); |
| ent = xmlParseStringPEReference(ctxt, &str); |
| if (ent != NULL) { |
| xmlChar *rep; |
| |
| ctxt->depth++; |
| rep = xmlStringDecodeEntities(ctxt, ent->content, what, |
| 0, 0, 0); |
| ctxt->depth--; |
| if (rep != NULL) { |
| current = rep; |
| while (*current != 0) { /* non input consuming loop */ |
| buffer[nbchars++] = *current++; |
| if (nbchars > |
| buffer_size - XML_PARSER_BUFFER_SIZE) { |
| growBuffer(buffer); |
| } |
| } |
| xmlFree(rep); |
| } |
| } |
| } else { |
| COPY_BUF(l,buffer,nbchars,c); |
| str += l; |
| if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { |
| growBuffer(buffer); |
| } |
| } |
| if (str < last) |
| c = CUR_SCHAR(str, l); |
| else |
| c = 0; |
| } |
| buffer[nbchars++] = 0; |
| return(buffer); |
| |
| mem_error: |
| xmlErrMemory(ctxt, NULL); |
| return(NULL); |
| } |
| |
| /** |
| * xmlStringDecodeEntities: |
| * @ctxt: the parser context |
| * @str: the input string |
| * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF |
| * @end: an end marker xmlChar, 0 if none |
| * @end2: an end marker xmlChar, 0 if none |
| * @end3: an end marker xmlChar, 0 if none |
| * |
| * Takes a entity string content and process to do the adequate substitutions. |
| * |
| * [67] Reference ::= EntityRef | CharRef |
| * |
| * [69] PEReference ::= '%' Name ';' |
| * |
| * Returns A newly allocated string with the substitution done. The caller |
| * must deallocate it ! |
| */ |
| xmlChar * |
| xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, |
| xmlChar end, xmlChar end2, xmlChar end3) { |
| return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, |
| end, end2, end3)); |
| } |
| |
| /************************************************************************ |
| * * |
| * Commodity functions, cleanup needed ? * |
| * * |
| ************************************************************************/ |
| |
| /** |
| * areBlanks: |
| * @ctxt: an XML parser context |
| * @str: a xmlChar * |
| * @len: the size of @str |
| * @blank_chars: we know the chars are blanks |
| * |
| * Is this a sequence of blank chars that one can ignore ? |
| * |
| * Returns 1 if ignorable 0 otherwise. |
| */ |
| |
| static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, |
| int blank_chars) { |
| int i, ret; |
| xmlNodePtr lastChild; |
| |
| /* |
| * Don't spend time trying to differentiate them, the same callback is |
| * used ! |
| */ |
| if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) |
| return(0); |
| |
| /* |
| * Check for xml:space value. |
| */ |
| if (*(ctxt->space) == 1) |
| return(0); |
| |
| /* |
| * Check that the string is made of blanks |
| */ |
| if (blank_chars == 0) { |
| for (i = 0;i < len;i++) |
| if (!(IS_BLANK_CH(str[i]))) return(0); |
| } |
| |
| /* |
| * Look if the element is mixed content in the DTD if available |
| */ |
| if (ctxt->node == NULL) return(0); |
| if (ctxt->myDoc != NULL) { |
| ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); |
| if (ret == 0) return(1); |
| if (ret == 1) return(0); |
| } |
| |
| /* |
| * Otherwise, heuristic :-\ |
| */ |
| if (RAW != '<') return(0); |
| if ((ctxt->node->children == NULL) && |
| (RAW == '<') && (NXT(1) == '/')) return(0); |
| |
| lastChild = xmlGetLastChild(ctxt->node); |
| if (lastChild == NULL) { |
| if ((ctxt->node->type != XML_ELEMENT_NODE) && |
| (ctxt->node->content != NULL)) return(0); |
| } else if (xmlNodeIsText(lastChild)) |
| return(0); |
| else if ((ctxt->node->children != NULL) && |
| (xmlNodeIsText(ctxt->node->children))) |
| return(0); |
| return(1); |
| } |
| |
| /************************************************************************ |
| * * |
| * Extra stuff for namespace support * |
| * Relates to http://www.w3.org/TR/WD-xml-names * |
| * * |
| ************************************************************************/ |
| |
| /** |
| * xmlSplitQName: |
| * @ctxt: an XML parser context |
| * @name: an XML parser context |
| * @prefix: a xmlChar ** |
| * |
| * parse an UTF8 encoded XML qualified name string |
| * |
| * [NS 5] QName ::= (Prefix ':')? LocalPart |
| * |
| * [NS 6] Prefix ::= NCName |
| * |
| * [NS 7] LocalPart ::= NCName |
| * |
| * Returns the local part, and prefix is updated |
| * to get the Prefix if any. |
| */ |
| |
| xmlChar * |
| xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { |
| xmlChar buf[XML_MAX_NAMELEN + 5]; |
| xmlChar *buffer = NULL; |
| int len = 0; |
| int max = XML_MAX_NAMELEN; |
| xmlChar *ret = NULL; |
| const xmlChar *cur = name; |
| int c; |
| |
| *prefix = NULL; |
| |
| if (cur == NULL) return(NULL); |
| |
| #ifndef XML_XML_NAMESPACE |
| /* xml: prefix is not really a namespace */ |
| if ((cur[0] == 'x') && (cur[1] == 'm') && |
| (cur[2] == 'l') && (cur[3] == ':')) |
| return(xmlStrdup(name)); |
| #endif |
| |
| /* nasty but well=formed */ |
| if (cur[0] == ':') |
| return(xmlStrdup(name)); |
| |
| c = *cur++; |
| while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ |
| buf[len++] = c; |
| c = *cur++; |
| } |
| if (len >= max) { |
| /* |
| * Okay someone managed to make a huge name, so he's ready to pay |
| * for the processing speed. |
| */ |
| max = len * 2; |
| |
| buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); |
| if (buffer == NULL) { |
| xmlErrMemory(ctxt, NULL); |
| return(NULL); |
| } |
| memcpy(buffer, buf, len); |
| while ((c != 0) && (c != ':')) { /* tested bigname.xml */ |
| if (len + 10 > max) { |
| max *= 2; |
| buffer = (xmlChar *) xmlRealloc(buffer, |
| max * sizeof(xmlChar)); |
| if (buffer == NULL) { |
| xmlErrMemory(ctxt, NULL); |
| return(NULL); |
| } |
| } |
| buffer[len++] = c; |
| c = *cur++; |
| } |
| buffer[len] = 0; |
| } |
| |
| /* nasty but well=formed |
| if ((c == ':') && (*cur == 0)) { |
| return(xmlStrdup(name)); |
| } */ |
| |
| if (buffer == NULL) |
| ret = xmlStrndup(buf, len); |
| else { |
| ret = buffer; |
| buffer = NULL; |
| max = XML_MAX_NAMELEN; |
| } |
| |
| |
| if (c == ':') { |
| c = *cur; |
| *prefix = ret; |
| if (c == 0) { |
| return(xmlStrndup(BAD_CAST "", 0)); |
| } |
| len = 0; |
| |
| /* |
| * Check that the first character is proper to start |
| * a new name |
| */ |
| if (!(((c >= 0x61) && (c <= 0x7A)) || |
| ((c >= 0x41) && (c <= 0x5A)) || |
| (c == '_') || (c == ':'))) { |
| int l; |
| int first = CUR_SCHAR(cur, l); |
| |
| if (!IS_LETTER(first) && (first != '_')) { |
| xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, |
| "Name %s is not XML Namespace compliant\n", |
| name); |
| } |
| } |
| cur++; |
| |
| while ((c != 0) && (len < max)) { /* tested bigname2.xml */ |
| buf[len++] = c; |
| c = *cur++; |
| } |
| if (len >= max) { |
| /* |
| * Okay someone managed to make a huge name, so he's ready to pay |
| * for the processing speed. |
| */ |
| max = len * 2; |
| |
| buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); |
| if (buffer == NULL) { |
| xmlErrMemory(ctxt, NULL); |
| return(NULL); |
| } |
| memcpy(buffer, buf, len); |
| while (c != 0) { /* tested bigname2.xml */ |
| if (len + 10 > max) { |
| max *= 2; |
| buffer = (xmlChar *) xmlRealloc(buffer, |
| max * sizeof(xmlChar)); |
| if (buffer == NULL) { |
| xmlErrMemory(ctxt, NULL); |
| return(NULL); |
| } |
| } |
| buffer[len++] = c; |
| c = *cur++; |
| } |
| buffer[len] = 0; |
| } |
| |
| if (buffer == NULL) |
| ret = xmlStrndup(buf, len); |
| else { |
| ret = buffer; |
| } |
| } |
| |
| return(ret); |
| } |
| |
| /************************************************************************ |
| * * |
| * The parser itself * |
| * Relates to http://www.w3.org/TR/REC-xml * |
| * * |
| ************************************************************************/ |
| |
| static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt); |
| static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, |
| int *len, int *alloc, int normalize); |
| |
| /** |
| * xmlParseName: |
| * @ctxt: an XML parser context |
| * |
| * parse an XML name. |
| * |
| * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | |
| * CombiningChar | Extender |
| * |
| * [5] Name ::= (Letter | '_' | ':') (NameChar)* |
| * |
| * [6] Names ::= Name (S Name)* |
| * |
| * Returns the Name parsed or NULL |
| */ |
| |
| const xmlChar * |
| xmlParseName(xmlParserCtxtPtr ctxt) { |
| const xmlChar *in; |
| const xmlChar *ret; |
| int count = 0; |
| |
| GROW; |
| |
| /* |
| * Accelerator for simple ASCII names |
| */ |
| in = ctxt->input->cur; |
| if (((*in >= 0x61) && (*in <= 0x7A)) || |
| ((*in >= 0x41) && (*in <= 0x5A)) || |
| (*in == '_') || (*in == ':')) { |
| in++; |
| while (((*in >= 0x61) && (*in <= 0x7A)) || |
| ((*in >= 0x41) && (*in <= 0x5A)) || |
| ((*in >= 0x30) && (*in <= 0x39)) || |
| (*in == '_') || (*in == '-') || |
| (*in == ':') || (*in == '.')) |
| in++; |
| if ((*in > 0) && (*in < 0x80)) { |
| count = in - ctxt->input->cur; |
| ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); |
| ctxt->input->cur = in; |
| ctxt->nbChars += count; |
| ctxt->input->col += count; |
| if (ret == NULL) |
| xmlErrMemory(ctxt, NULL); |
| return(ret); |
| } |
| } |
| return(xmlParseNameComplex(ctxt)); |
| } |
| |
| /** |
| * xmlParseNameAndCompare: |
| * @ctxt: an XML parser context |
| * |
| * parse an XML name and compares for match |
| * (specialized for endtag parsing) |
| * |
| * Returns NULL for an illegal name, (xmlChar*) 1 for success |
| * and the name for mismatch |
| */ |
| |
| static const xmlChar * |
| xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { |
| register const xmlChar *cmp = other; |
| register const xmlChar *in; |
| const xmlChar *ret; |
| |
| GROW; |
| |
| in = ctxt->input->cur; |
| while (*in != 0 && *in == *cmp) { |
| ++in; |
| ++cmp; |
| } |
| if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { |
| /* success */ |
| ctxt->input->cur = in; |
| return (const xmlChar*) 1; |
| } |
| /* failure (or end of input buffer), check with full function */ |
| ret = xmlParseName (ctxt); |
| /* strings coming from the dictionnary direct compare possible */ |
| if (ret == other) { |
| return (const xmlChar*) 1; |
| } |
| return ret; |
| } |
| |
| static const xmlChar * |
| xmlParseNameComplex(xmlParserCtxtPtr ctxt) { |
| int len = 0, l; |
| int c; |
| int count = 0; |
| |
| /* |
| * Handler for more complex cases |
| */ |
| GROW; |
| c = CUR_CHAR(l); |
| if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ |
| (!IS_LETTER(c) && (c != '_') && |
| (c != ':'))) { |
| return(NULL); |
| } |
| |
| while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ |
| ((IS_LETTER(c)) || (IS_DIGIT(c)) || |
| (c == '.') || (c == '-') || |
| (c == '_') || (c == ':') || |
| (IS_COMBINING(c)) || |
| (IS_EXTENDER(c)))) { |
| if (count++ > 100) { |
| count = 0; |
| GROW; |
| } |
| len += l; |
| NEXTL(l); |
| c = CUR_CHAR(l); |
| } |
| return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); |
| } |
| |
| /** |
| * xmlParseStringName: |
| * @ctxt: an XML parser context |
| * @str: a pointer to the string pointer (IN/OUT) |
| * |
| * parse an XML name. |
| * |
| * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | |
| * CombiningChar | Extender |
| * |
| * [5] Name ::= (Letter | '_' | ':') (NameChar)* |
| * |
| * [6] Names ::= Name (S Name)* |
| * |
| * Returns the Name parsed or NULL. The @str pointer |
| * is updated to the current location in the string. |
| */ |
| |
| static xmlChar * |
| xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { |
| xmlChar buf[XML_MAX_NAMELEN + 5]; |
| const xmlChar *cur = *str; |
| int len = 0, l; |
| int c; |
| |
| c = CUR_SCHAR(cur, l); |
| if (!IS_LETTER(c) && (c != '_') && |
| (c != ':')) { |
| return(NULL); |
| } |
| |
| while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ |
| (c == '.') || (c == '-') || |
| (c == '_') || (c == ':') || |
| (IS_COMBINING(c)) || |
| (IS_EXTENDER(c))) { |
| COPY_BUF(l,buf,len,c); |
| cur += l; |
| c = CUR_SCHAR(cur, l); |
| if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ |
| /* |
| * Okay someone managed to make a huge name, so he's ready to pay |
| * for the processing speed. |
| */ |
| xmlChar *buffer; |
| int max = len * 2; |
| |
| buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); |
| if (buffer == NULL) { |
| xmlErrMemory(ctxt, NULL); |
| return(NULL); |
| } |
| memcpy(buffer, buf, len); |
| while ((IS_LETTER(c)) || (IS_DIGIT(c)) || |
| /* test bigentname.xml */ |
| (c == '.') || (c == '-') || |
| (c == '_') || (c == ':') || |
| (IS_COMBINING(c)) || |
| (IS_EXTENDER(c))) { |
| if (len + 10 > max) { |
| max *= 2; |
| buffer = (xmlChar *) xmlRealloc(buffer, |
| max * sizeof(xmlChar)); |
| if (buffer == NULL) { |
| xmlErrMemory(ctxt, NULL); |
| return(NULL); |
| } |
| } |
| COPY_BUF(l,buffer,len,c); |
| cur += l; |
| c = CUR_SCHAR(cur, l); |
| } |
| buffer[len] = 0; |
| *str = cur; |
| return(buffer); |
| } |
| } |
| *str = cur; |
| return(xmlStrndup(buf, len)); |
| } |
| |
| /** |
| * xmlParseNmtoken: |
| * @ctxt: an XML parser context |
| * |
| * parse an XML Nmtoken. |
| * |
| * [7] Nmtoken ::= (NameChar)+ |
| * |
| * [8] Nmtokens ::= Nmtoken (S Nmtoken)* |
| * |
| * Returns the Nmtoken parsed or NULL |
| */ |
| |
| xmlChar * |
| xmlParseNmtoken(xmlParserCtxtPtr ctxt) { |
| xmlChar buf[XML_MAX_NAMELEN + 5]; |
| int len = 0, l; |
| int c; |
| int count = 0; |
| |
| GROW; |
| c = CUR_CHAR(l); |
| |
| while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ |
| (c == '.') || (c == '-') || |
| (c == '_') || (c == ':') || |
| (IS_COMBINING(c)) || |
| (IS_EXTENDER(c))) { |
| if (count++ > 100) { |
| count = 0; |
| GROW; |
| } |
| COPY_BUF(l,buf,len,c); |
| NEXTL(l); |
| c = CUR_CHAR(l); |
| if (len >= XML_MAX_NAMELEN) { |
| /* |
| * Okay someone managed to make a huge token, so he's ready to pay |
| * for the processing speed. |
| */ |
| xmlChar *buffer; |
| int max = len * 2; |
| |
| buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); |
| if (buffer == NULL) { |
| xmlErrMemory(ctxt, NULL); |
| return(NULL); |
| } |
| memcpy(buffer, buf, len); |
| while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ |
| (c == '.') || (c == '-') || |
| (c == '_') || (c == ':') || |
| (IS_COMBINING(c)) || |
| (IS_EXTENDER(c))) { |
| if (count++ > 100) { |
| count = 0; |
| GROW; |
| } |
| if (len + 10 > max) { |
| max *= 2; |
| buffer = (xmlChar *) xmlRealloc(buffer, |
| max * sizeof(xmlChar)); |
| if (buffer == NULL) { |
| xmlErrMemory(ctxt, NULL); |
| return(NULL); |
| } |
| } |
| COPY_BUF(l,buffer,len,c); |
| NEXTL(l); |
| c = CUR_CHAR(l); |
| } |
| buffer[len] = 0; |
| return(buffer); |
| } |
| } |
| if (len == 0) |
| return(NULL); |
| return(xmlStrndup(buf, len)); |
| } |
| |
| /** |
| * xmlParseEntityValue: |
| * @ctxt: an XML parser context |
| * @orig: if non-NULL store a copy of the original entity value |
| * |
| * parse a value for ENTITY declarations |
| * |
| * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | |
| * "'" ([^%&'] | PEReference | Reference)* "'" |
| * |
| * Returns the EntityValue parsed with reference substituted or NULL |
| */ |
| |
| xmlChar * |
| xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { |
| xmlChar *buf = NULL; |
| int len = 0; |
| int size = XML_PARSER_BUFFER_SIZE; |
| int c, l; |
| xmlChar stop; |
| xmlChar *ret = NULL; |
| const xmlChar *cur = NULL; |
| xmlParserInputPtr input; |
| |
| if (RAW == '"') stop = '"'; |
| else if (RAW == '\'') stop = '\''; |
| else { |
| xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); |
| return(NULL); |
| } |
| buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); |
| if (buf == NULL) { |
| xmlErrMemory(ctxt, NULL); |
| return(NULL); |
| } |
| |
| /* |
| * The content of the entity definition is copied in a buffer. |
| */ |
| |
| ctxt->instate = XML_PARSER_ENTITY_VALUE; |
| input = ctxt->input; |
| GROW; |
| NEXT; |
| c = CUR_CHAR(l); |
| /* |
| * NOTE: 4.4.5 Included in Literal |
| * When a parameter entity reference appears in a literal entity |
| * value, ... a single or double quote character in the replacement |
| * text is always treated as a normal data character and will not |
| * terminate the literal. |
| * In practice it means we stop the loop only when back at parsing |
| * the initial entity and the quote is found |
| */ |
| while ((IS_CHAR(c)) && ((c != stop) || /* checked */ |
| (ctxt->input != input))) { |
| if (len + 5 >= size) { |
| size *= 2; |
| buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); |
| if (buf == NULL) { |
| xmlErrMemory(ctxt, NULL); |
| return(NULL); |
| } |
| } |
| COPY_BUF(l,buf,len,c); |
| NEXTL(l); |
| /* |
| * Pop-up of finished entities. |
| */ |
| while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ |
| xmlPopInput(ctxt); |
| |
| GROW; |
| c = CUR_CHAR(l); |
| if (c == 0) { |
| GROW; |
| c = CUR_CHAR(l); |
| } |
| } |
| buf[len] = 0; |
| |
| /* |
| * Raise problem w.r.t. '&' and '%' being used in non-entities |
| * reference constructs. Note Charref will be handled in |
| * xmlStringDecodeEntities() |
| */ |
| cur = buf; |
| while (*cur != 0) { /* non input consuming */ |
| if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { |
| xmlChar *name; |
| xmlChar tmp = *cur; |
| |
| cur++; |
| name = xmlParseStringName(ctxt, &cur); |
| if ((name == NULL) || (*cur != ';')) { |
| xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, |
| "EntityValue: '%c' forbidden except for entities references\n", |
| tmp); |
| } |
| if ((tmp == '%') && (ctxt->inSubset == 1) && |
| (ctxt->inputNr == 1)) { |
| xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); |
| } |
| if (name != NULL) |
| xmlFree(name); |
| if (*cur == 0) |
| break; |
| } |
| cur++; |
| } |
| |
| /* |
| * Then PEReference entities are substituted. |
| */ |
| if (c != stop) { |
| xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); |
| xmlFree(buf); |
| } else { |
| NEXT; |
| /* |
| * NOTE: 4.4.7 Bypassed |
| * When a general entity reference appears in the EntityValue in |
| * an entity declaration, it is bypassed and left as is. |
| * so XML_SUBSTITUTE_REF is not set here. |
| */ |
| ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, |
| 0, 0, 0); |
| if (orig != NULL) |
| *orig = buf; |
| else |
| xmlFree(buf); |
| } |
| |
| return(ret); |
| } |
| |
| /** |
| * xmlParseAttValueComplex: |
| * @ctxt: an XML parser context |
| * @len: the resulting attribute len |
| * @normalize: wether to apply the inner normalization |
| * |
| * parse a value for an attribute, this is the fallback function |
| * of xmlParseAttValue() when the attribute parsing requires handling |
| * of non-ASCII characters, or normalization compaction. |
| * |
| * Returns the AttValue parsed or NULL. The value has to be freed by the caller. |
| */ |
| static xmlChar * |
| xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { |
| xmlChar limit = 0; |
| xmlChar *buf = NULL; |
| int len = 0; |
| int buf_size = 0; |
| int c, l, in_space = 0; |
| xmlChar *current = NULL; |
| xmlEntityPtr ent; |
| |
| if (NXT(0) == '"') { |
| ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; |
| limit = '"'; |
| NEXT; |
| } else if (NXT(0) == '\'') { |
| limit = '\''; |
| ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; |
| NEXT; |
| } else { |
| xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); |
| return(NULL); |
| } |
| |
| /* |
| * allocate a translation buffer. |
| */ |
| buf_size = XML_PARSER_BUFFER_SIZE; |
| buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar)); |
| if (buf == NULL) goto mem_error; |
| |
| /* |
| * OK loop until we reach one of the ending char or a size limit. |
| */ |
| c = CUR_CHAR(l); |
| while ((NXT(0) != limit) && /* checked */ |
| (c != '<')) { |
| if (c == 0) break; |
| if (c == '&') { |
| in_space = 0; |
| if (NXT(1) == '#') { |
| int val = xmlParseCharRef(ctxt); |
| |
| if (val == '&') { |
| if (ctxt->replaceEntities) { |
| if (len > buf_size - 10) { |
| growBuffer(buf); |
| } |
| buf[len++] = '&'; |
| } else { |
| /* |
| * The reparsing will be done in xmlStringGetNodeList() |
| * called by the attribute() function in SAX.c |
| */ |
| if (len > buf_size - 10) { |
| growBuffer(buf); |
| } |
| buf[len++] = '&'; |
| buf[len++] = '#'; |
| buf[len++] = '3'; |
| buf[len++] = '8'; |
| buf[len++] = ';'; |
| } |
| } else { |
| if (len > buf_size - 10) { |
| growBuffer(buf); |
| } |
| len += xmlCopyChar(0, &buf[len], val); |
| } |
| } else { |
| ent = xmlParseEntityRef(ctxt); |
| if ((ent != NULL) && |
| (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { |
| if (len > buf_size - 10) { |
| growBuffer(buf); |
| } |
| if ((ctxt->replaceEntities == 0) && |
| (ent->content[0] == '&')) { |
| buf[len++] = '&'; |
| buf[len++] = '#'; |
| buf[len++] = '3'; |
| buf[len++] = '8'; |
| buf[len++] = ';'; |
| } else { |
| buf[len++] = ent->content[0]; |
| } |
| } else if ((ent != NULL) && |
| (ctxt->replaceEntities != 0)) { |
| xmlChar *rep; |
| |
| if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { |
| rep = xmlStringDecodeEntities(ctxt, ent->content, |
| XML_SUBSTITUTE_REF, |
| 0, 0, 0); |
| if (rep != NULL) { |
| current = rep; |
| while (*current != 0) { /* non input consuming */ |
| buf[len++] = *current++; |
| if (len > buf_size - 10) { |
| growBuffer(buf); |
| } |
| } |
| xmlFree(rep); |
| } |
| } else { |
| if (len > buf_size - 10) { |
| growBuffer(buf); |
| } |
| if (ent->content != NULL) |
| buf[len++] = ent->content[0]; |
| } |
| } else if (ent != NULL) { |
| int i = xmlStrlen(ent->name); |
| const xmlChar *cur = ent->name; |
| |
| /* |
| * This may look absurd but is needed to detect |
| * entities problems |
| */ |
| if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && |
| (ent->content != NULL)) { |
| xmlChar *rep; |
| rep = xmlStringDecodeEntities(ctxt, ent->content, |
| XML_SUBSTITUTE_REF, 0, 0, 0); |
| if (rep != NULL) |
| xmlFree(rep); |
| } |
| |
| /* |
| * Just output the reference |
| */ |
| buf[len++] = '&'; |
| if (len > buf_size - i - 10) { |
| growBuffer(buf); |
| } |
| for (;i > 0;i--) |
| buf[len++] = *cur++; |
| buf[len++] = ';'; |
| } |
| } |
| } else { |
| if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { |
| if ((len != 0) || (!normalize)) { |
| if ((!normalize) || (!in_space)) { |
| COPY_BUF(l,buf,len,0x20); |
| if (len > buf_size - 10) { |
| growBuffer(buf); |
| } |
| } |
| in_space = 1; |
| } |
| } else { |
| in_space = 0; |
| COPY_BUF(l,buf,len,c); |
| if (len > buf_size - 10) { |
| growBuffer(buf); |
| } |
| } |
| NEXTL(l); |
| } |
| GROW; |
| c = CUR_CHAR(l); |
| } |
| if ((in_space) && (normalize)) { |
| while (buf[len - 1] == 0x20) len--; |
| } |
| buf[len] = 0; |
| if (RAW == '<') { |
| xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); |
| } else if (RAW != limit) { |
| xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, |
| "AttValue: ' expected\n"); |
| } else |
| NEXT; |
| if (attlen != NULL) *attlen = len; |
| return(buf); |
| |
| mem_error: |
| xmlErrMemory(ctxt, NULL); |
| return(NULL); |
| } |
| |
| /** |
| * xmlParseAttValue: |
| * @ctxt: an XML parser context |
| * |
| * parse a value for an attribute |
| * Note: the parser won't do substitution of entities here, this |
| * will be handled later in xmlStringGetNodeList |
| * |
| * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | |
| * "'" ([^<&'] | Reference)* "'" |
| * |
| * 3.3.3 Attribute-Value Normalization: |
| * Before the value of an attribute is passed to the application or |
| * checked for validity, the XML processor must normalize it as follows: |
| * - a character reference is processed by appending the referenced |
| * character to the attribute value |
| * - an entity reference is processed by recursively processing the |
| * replacement text of the entity |
| * - a whitespace character (#x20, #xD, #xA, #x9) is processed by |
| * appending #x20 to the normalized value, except that only a single |
| * #x20 is appended for a "#xD#xA" sequence that is part of an external |
| * parsed entity or the literal entity value of an internal parsed entity |
| * - other characters are processed by appending them to the normalized value |
| * If the declared value is not CDATA, then the XML processor must further |
| * process the normalized attribute value by discarding any leading and |
| * trailing space (#x20) characters, and by replacing sequences of space |
| * (#x20) characters by a single space (#x20) character. |
| * All attributes for which no declaration has been read should be treated |
| * by a non-validating parser as if declared CDATA. |
| * |
| * Returns the AttValue parsed or NULL. The value has to be freed by the caller. |
| */ |
| |
| |
| xmlChar * |
| xmlParseAttValue(xmlParserCtxtPtr ctxt) { |
| return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); |
| } |
| |
| /** |
| * xmlParseSystemLiteral: |
| * @ctxt: an XML parser context |
| * |
| * parse an XML Literal |
| * |
| * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") |
| * |
| * Returns the SystemLiteral parsed or NULL |
| */ |
| |
| xmlChar * |
| xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { |
| xmlChar *buf = NULL; |
| int len = 0; |
| int size = XML_PARSER_BUFFER_SIZE; |
| int cur, l; |
| xmlChar stop; |
| int state = ctxt->instate; |
| int count = 0; |
| |
| SHRINK; |
| if (RAW == '"') { |
| NEXT; |
| stop = '"'; |
| } else if (RAW == '\'') { |
| NEXT; |
| stop = '\''; |
| } else { |
| xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); |
| return(NULL); |
| } |
| |
| buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); |
| if (buf == NULL) { |
| xmlErrMemory(ctxt, NULL); |
| return(NULL); |
| } |
| ctxt->instate = XML_PARSER_SYSTEM_LITERAL; |
| cur = CUR_CHAR(l); |
| while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ |
| if (len + 5 >= size) { |
| size *= 2; |
| buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); |
| if (buf == NULL) { |
| xmlErrMemory(ctxt, NULL); |
| ctxt->instate = (xmlParserInputState) state; |
| return(NULL); |
| } |
| } |
| count++; |
| if (count > 50) { |
| GROW; |
| count <
|