| /* |
| * parser.c : an XML 1.0 parser, namespaces and validity support are mostly |
| * implemented on top of the SAX interfaces |
| * |
| * References: |
| * The XML specification: |
| * http://www.w3.org/TR/REC-xml |
| * Original 1.0 version: |
| * http://www.w3.org/TR/1998/REC-xml-19980210 |
| * XML second edition working draft |
| * http://www.w3.org/TR/2000/WD-xml-2e-20000814 |
| * |
| * Okay this is a big file, the parser core is around 7000 lines, then it |
| * is followed by the progressive parser top routines, then the various |
| * high level APIs to call the parser and a few miscelaneous functions. |
| * A number of helper functions and deprecated ones have been moved to |
| * parserInternals.c to reduce this file size. |
| * As much as possible the functions are associated with their relative |
| * production in the XML specification. A few productions defining the |
| * different ranges of character are actually implanted either in |
| * parserInternals.h or parserInternals.c |
| * The DOM tree build is realized from the default SAX callbacks in |
| * the module SAX.c. |
| * The routines doing the validation checks are in valid.c and called either |
| * from the SAx callbacks or as standalones functions using a preparsed |
| * document. |
| * |
| * See Copyright for the status of this software. |
| * |
| * Daniel.Veillard@w3.org |
| * |
| * 14 Nov 2000 ht - truncated definitions of xmlSubstituteEntitiesDefaultValue |
| * and xmlDoValidityCheckingDefaultValue for VMS |
| */ |
| |
| #include "libxml.h" |
| |
| #ifdef WIN32 |
| #define XML_DIR_SEP '\\' |
| #else |
| #define XML_DIR_SEP '/' |
| #endif |
| |
| #include <stdlib.h> |
| #include <string.h> |
| #include <libxml/xmlmemory.h> |
| #include <libxml/tree.h> |
| #include <libxml/parser.h> |
| #include <libxml/parserInternals.h> |
| #include <libxml/valid.h> |
| #include <libxml/entities.h> |
| #include <libxml/xmlerror.h> |
| #include <libxml/encoding.h> |
| #include <libxml/xmlIO.h> |
| #include <libxml/uri.h> |
| |
| #ifdef HAVE_CTYPE_H |
| #include <ctype.h> |
| #endif |
| #ifdef HAVE_STDLIB_H |
| #include <stdlib.h> |
| #endif |
| #ifdef HAVE_SYS_STAT_H |
| #include <sys/stat.h> |
| #endif |
| #ifdef HAVE_FCNTL_H |
| #include <fcntl.h> |
| #endif |
| #ifdef HAVE_UNISTD_H |
| #include <unistd.h> |
| #endif |
| #ifdef HAVE_ZLIB_H |
| #include <zlib.h> |
| #endif |
| |
| |
| #define XML_PARSER_BIG_BUFFER_SIZE 300 |
| #define XML_PARSER_BUFFER_SIZE 100 |
| |
| /* |
| * Various global defaults for parsing |
| */ |
| int xmlGetWarningsDefaultValue = 1; |
| int xmlParserDebugEntities = 0; |
| #ifdef VMS |
| int xmlSubstituteEntitiesDefaultVal = 0; |
| #define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal |
| int xmlDoValidityCheckingDefaultVal = 0; |
| #define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal |
| #else |
| int xmlSubstituteEntitiesDefaultValue = 0; |
| int xmlDoValidityCheckingDefaultValue = 0; |
| #endif |
| int xmlLoadExtDtdDefaultValue = 0; |
| int xmlPedanticParserDefaultValue = 0; |
| int xmlKeepBlanksDefaultValue = 1; |
| |
| /* |
| * List of XML prefixed PI allowed by W3C specs |
| */ |
| |
| const char *xmlW3CPIs[] = { |
| "xml-stylesheet", |
| NULL |
| }; |
| |
| /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ |
| void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt); |
| xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, |
| const xmlChar **str); |
| |
| |
| /************************************************************************ |
| * * |
| * Parser stacks related functions and macros * |
| * * |
| ************************************************************************/ |
| |
| xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, |
| const xmlChar ** str); |
| |
| /* |
| * Generic function for accessing stacks in the Parser Context |
| */ |
| |
| #define PUSH_AND_POP(scope, type, name) \ |
| scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \ |
| if (ctxt->name##Nr >= ctxt->name##Max) { \ |
| ctxt->name##Max *= 2; \ |
| ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \ |
| ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \ |
| if (ctxt->name##Tab == NULL) { \ |
| xmlGenericError(xmlGenericErrorContext, \ |
| "realloc failed !\n"); \ |
| return(0); \ |
| } \ |
| } \ |
| ctxt->name##Tab[ctxt->name##Nr] = value; \ |
| ctxt->name = value; \ |
| return(ctxt->name##Nr++); \ |
| } \ |
| scope type name##Pop(xmlParserCtxtPtr ctxt) { \ |
| type ret; \ |
| if (ctxt->name##Nr <= 0) return(0); \ |
| ctxt->name##Nr--; \ |
| if (ctxt->name##Nr > 0) \ |
| ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \ |
| else \ |
| ctxt->name = NULL; \ |
| ret = ctxt->name##Tab[ctxt->name##Nr]; \ |
| ctxt->name##Tab[ctxt->name##Nr] = 0; \ |
| return(ret); \ |
| } \ |
| |
| /* |
| * Those macros actually generate the functions |
| */ |
| PUSH_AND_POP(extern, xmlParserInputPtr, input) |
| PUSH_AND_POP(extern, xmlNodePtr, node) |
| PUSH_AND_POP(extern, xmlChar*, name) |
| |
| static int spacePush(xmlParserCtxtPtr ctxt, int val) { |
| if (ctxt->spaceNr >= ctxt->spaceMax) { |
| ctxt->spaceMax *= 2; |
| ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab, |
| ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); |
| if (ctxt->spaceTab == NULL) { |
| xmlGenericError(xmlGenericErrorContext, |
| "realloc failed !\n"); |
| return(0); |
| } |
| } |
| ctxt->spaceTab[ctxt->spaceNr] = val; |
| ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; |
| return(ctxt->spaceNr++); |
| } |
| |
| static int spacePop(xmlParserCtxtPtr ctxt) { |
| int ret; |
| if (ctxt->spaceNr <= 0) return(0); |
| ctxt->spaceNr--; |
| if (ctxt->spaceNr > 0) |
| ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; |
| else |
| ctxt->space = NULL; |
| ret = ctxt->spaceTab[ctxt->spaceNr]; |
| ctxt->spaceTab[ctxt->spaceNr] = -1; |
| return(ret); |
| } |
| |
| /* |
| * Macros for accessing the content. Those should be used only by the parser, |
| * and not exported. |
| * |
| * Dirty macros, i.e. one often need to make assumption on the context to |
| * use them |
| * |
| * CUR_PTR return the current pointer to the xmlChar to be parsed. |
| * To be used with extreme caution since operations consuming |
| * characters may move the input buffer to a different location ! |
| * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled |
| * This should be used internally by the parser |
| * only to compare to ASCII values otherwise it would break when |
| * running with UTF-8 encoding. |
| * RAW same as CUR but in the input buffer, bypass any token |
| * extraction that may have been done |
| * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only |
| * to compare on ASCII based substring. |
| * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined |
| * strings within the parser. |
| * |
| * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding |
| * |
| * NEXT Skip to the next character, this does the proper decoding |
| * in UTF-8 mode. It also pop-up unfinished entities on the fly. |
| * NEXTL(l) Skip l xmlChars in the input buffer |
| * CUR_CHAR(l) returns the current unicode character (int), set l |
| * to the number of xmlChars used for the encoding [0-5]. |
| * CUR_SCHAR same but operate on a string instead of the context |
| * COPY_BUF copy the current unicode char to the target buffer, increment |
| * the index |
| * GROW, SHRINK handling of input buffers |
| */ |
| |
| #define RAW (ctxt->token ? -1 : (*ctxt->input->cur)) |
| #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur)) |
| #define NXT(val) ctxt->input->cur[(val)] |
| #define CUR_PTR ctxt->input->cur |
| |
| #define SKIP(val) do { \ |
| ctxt->nbChars += (val),ctxt->input->cur += (val); \ |
| if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ |
| if ((*ctxt->input->cur == 0) && \ |
| (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ |
| xmlPopInput(ctxt); \ |
| } while (0) |
| |
| #define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\ |
| xmlParserInputShrink(ctxt->input); \ |
| if ((*ctxt->input->cur == 0) && \ |
| (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ |
| xmlPopInput(ctxt); \ |
| } |
| |
| #define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \ |
| xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ |
| if ((*ctxt->input->cur == 0) && \ |
| (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ |
| xmlPopInput(ctxt); \ |
| } |
| |
| #define SKIP_BLANKS xmlSkipBlankChars(ctxt) |
| |
| #define NEXT xmlNextChar(ctxt) |
| |
| #define NEXT1 { \ |
| ctxt->input->cur++; \ |
| ctxt->nbChars++; \ |
| if (*ctxt->input->cur == 0) \ |
| xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ |
| } |
| |
| #define NEXTL(l) do { \ |
| if (*(ctxt->input->cur) == '\n') { \ |
| ctxt->input->line++; ctxt->input->col = 1; \ |
| } else ctxt->input->col++; \ |
| ctxt->token = 0; ctxt->input->cur += l; \ |
| if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ |
| } while (0) |
| |
| #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) |
| #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) |
| |
| #define COPY_BUF(l,b,i,v) \ |
| if (l == 1) b[i++] = (xmlChar) v; \ |
| else i += xmlCopyCharMultiByte(&b[i],v) |
| |
| /** |
| * xmlSkipBlankChars: |
| * @ctxt: the XML parser context |
| * |
| * skip all blanks character found at that point in the input streams. |
| * It pops up finished entities in the process if allowable at that point. |
| * |
| * Returns the number of space chars skipped |
| */ |
| |
| int |
| xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { |
| int cur, res = 0; |
| |
| /* |
| * It's Okay to use CUR/NEXT here since all the blanks are on |
| * the ASCII range. |
| */ |
| do { |
| cur = CUR; |
| while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */ |
| NEXT; |
| cur = CUR; |
| res++; |
| } |
| while ((cur == 0) && (ctxt->inputNr > 1) && |
| (ctxt->instate != XML_PARSER_COMMENT)) { |
| xmlPopInput(ctxt); |
| cur = CUR; |
| } |
| /* |
| * Need to handle support of entities branching here |
| */ |
| if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); |
| /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */ |
| } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ |
| return(res); |
| } |
| |
| /************************************************************************ |
| * * |
| * Commodity functions to handle entities * |
| * * |
| ************************************************************************/ |
| |
| /** |
| * xmlPopInput: |
| * @ctxt: an XML parser context |
| * |
| * xmlPopInput: the current input pointed by ctxt->input came to an end |
| * pop it and return the next char. |
| * |
| * Returns the current xmlChar in the parser context |
| */ |
| xmlChar |
| xmlPopInput(xmlParserCtxtPtr ctxt) { |
| if (ctxt->inputNr == 1) return(0); /* End of main Input */ |
| if (xmlParserDebugEntities) |
| xmlGenericError(xmlGenericErrorContext, |
| "Popping input %d\n", ctxt->inputNr); |
| xmlFreeInputStream(inputPop(ctxt)); |
| if ((*ctxt->input->cur == 0) && |
| (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) |
| return(xmlPopInput(ctxt)); |
| return(CUR); |
| } |
| |
| /** |
| * xmlPushInput: |
| * @ctxt: an XML parser context |
| * @input: an XML parser input fragment (entity, XML fragment ...). |
| * |
| * xmlPushInput: switch to a new input stream which is stacked on top |
| * of the previous one(s). |
| */ |
| void |
| xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { |
| if (input == NULL) return; |
| |
| if (xmlParserDebugEntities) { |
| if ((ctxt->input != NULL) && (ctxt->input->filename)) |
| xmlGenericError(xmlGenericErrorContext, |
| "%s(%d): ", ctxt->input->filename, |
| ctxt->input->line); |
| xmlGenericError(xmlGenericErrorContext, |
| "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); |
| } |
| inputPush(ctxt, input); |
| GROW; |
| } |
| |
| /** |
| * xmlParseCharRef: |
| * @ctxt: an XML parser context |
| * |
| * parse Reference declarations |
| * |
| * [66] CharRef ::= '&#' [0-9]+ ';' | |
| * '&#x' [0-9a-fA-F]+ ';' |
| * |
| * [ WFC: Legal Character ] |
| * Characters referred to using character references must match the |
| * production for Char. |
| * |
| * Returns the value parsed (as an int), 0 in case of error |
| */ |
| int |
| xmlParseCharRef(xmlParserCtxtPtr ctxt) { |
| unsigned int val = 0; |
| int count = 0; |
| |
| if (ctxt->token != 0) { |
| val = ctxt->token; |
| ctxt->token = 0; |
| return(val); |
| } |
| /* |
| * Using RAW/CUR/NEXT is okay since we are working on ASCII range here |
| */ |
| if ((RAW == '&') && (NXT(1) == '#') && |
| (NXT(2) == 'x')) { |
| SKIP(3); |
| GROW; |
| while (RAW != ';') { /* loop blocked by count */ |
| if ((RAW >= '0') && (RAW <= '9') && (count < 20)) |
| val = val * 16 + (CUR - '0'); |
| else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) |
| val = val * 16 + (CUR - 'a') + 10; |
| else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) |
| val = val * 16 + (CUR - 'A') + 10; |
| else { |
| ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "xmlParseCharRef: invalid hexadecimal value\n"); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| val = 0; |
| break; |
| } |
| NEXT; |
| count++; |
| } |
| if (RAW == ';') { |
| /* on purpose to avoid reentrancy problems with NEXT and SKIP */ |
| ctxt->nbChars ++; |
| ctxt->input->cur++; |
| } |
| } else if ((RAW == '&') && (NXT(1) == '#')) { |
| SKIP(2); |
| GROW; |
| while (RAW != ';') { /* loop blocked by count */ |
| if ((RAW >= '0') && (RAW <= '9') && (count < 20)) |
| val = val * 10 + (CUR - '0'); |
| else { |
| ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "xmlParseCharRef: invalid decimal value\n"); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| val = 0; |
| break; |
| } |
| NEXT; |
| count++; |
| } |
| if (RAW == ';') { |
| /* on purpose to avoid reentrancy problems with NEXT and SKIP */ |
| ctxt->nbChars ++; |
| ctxt->input->cur++; |
| } |
| } else { |
| ctxt->errNo = XML_ERR_INVALID_CHARREF; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "xmlParseCharRef: invalid value\n"); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| } |
| |
| /* |
| * [ WFC: Legal Character ] |
| * Characters referred to using character references must match the |
| * production for Char. |
| */ |
| if (IS_CHAR(val)) { |
| return(val); |
| } else { |
| ctxt->errNo = XML_ERR_INVALID_CHAR; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n", |
| val); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| } |
| return(0); |
| } |
| |
| /** |
| * xmlParseStringCharRef: |
| * @ctxt: an XML parser context |
| * @str: a pointer to an index in the string |
| * |
| * parse Reference declarations, variant parsing from a string rather |
| * than an an input flow. |
| * |
| * [66] CharRef ::= '&#' [0-9]+ ';' | |
| * '&#x' [0-9a-fA-F]+ ';' |
| * |
| * [ WFC: Legal Character ] |
| * Characters referred to using character references must match the |
| * production for Char. |
| * |
| * Returns the value parsed (as an int), 0 in case of error, str will be |
| * updated to the current value of the index |
| */ |
| static int |
| xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { |
| const xmlChar *ptr; |
| xmlChar cur; |
| int val = 0; |
| |
| if ((str == NULL) || (*str == NULL)) return(0); |
| ptr = *str; |
| cur = *ptr; |
| if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { |
| ptr += 3; |
| cur = *ptr; |
| while (cur != ';') { /* Non input consuming loop */ |
| if ((cur >= '0') && (cur <= '9')) |
| val = val * 16 + (cur - '0'); |
| else if ((cur >= 'a') && (cur <= 'f')) |
| val = val * 16 + (cur - 'a') + 10; |
| else if ((cur >= 'A') && (cur <= 'F')) |
| val = val * 16 + (cur - 'A') + 10; |
| else { |
| ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "xmlParseStringCharRef: invalid hexadecimal value\n"); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| val = 0; |
| break; |
| } |
| ptr++; |
| cur = *ptr; |
| } |
| if (cur == ';') |
| ptr++; |
| } else if ((cur == '&') && (ptr[1] == '#')){ |
| ptr += 2; |
| cur = *ptr; |
| while (cur != ';') { /* Non input consuming loops */ |
| if ((cur >= '0') && (cur <= '9')) |
| val = val * 10 + (cur - '0'); |
| else { |
| ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "xmlParseStringCharRef: invalid decimal value\n"); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| val = 0; |
| break; |
| } |
| ptr++; |
| cur = *ptr; |
| } |
| if (cur == ';') |
| ptr++; |
| } else { |
| ctxt->errNo = XML_ERR_INVALID_CHARREF; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "xmlParseCharRef: invalid value\n"); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| return(0); |
| } |
| *str = ptr; |
| |
| /* |
| * [ WFC: Legal Character ] |
| * Characters referred to using character references must match the |
| * production for Char. |
| */ |
| if (IS_CHAR(val)) { |
| return(val); |
| } else { |
| ctxt->errNo = XML_ERR_INVALID_CHAR; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "CharRef: invalid xmlChar value %d\n", val); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| } |
| return(0); |
| } |
| |
| /** |
| * xmlParserHandlePEReference: |
| * @ctxt: the parser context |
| * |
| * [69] PEReference ::= '%' Name ';' |
| * |
| * [ WFC: No Recursion ] |
| * A parsed entity must not contain a recursive |
| * reference to itself, either directly or indirectly. |
| * |
| * [ WFC: Entity Declared ] |
| * In a document without any DTD, a document with only an internal DTD |
| * subset which contains no parameter entity references, or a document |
| * with "standalone='yes'", ... ... The declaration of a parameter |
| * entity must precede any reference to it... |
| * |
| * [ VC: Entity Declared ] |
| * In a document with an external subset or external parameter entities |
| * with "standalone='no'", ... ... The declaration of a parameter entity |
| * must precede any reference to it... |
| * |
| * [ WFC: In DTD ] |
| * Parameter-entity references may only appear in the DTD. |
| * NOTE: misleading but this is handled. |
| * |
| * A PEReference may have been detected in the current input stream |
| * the handling is done accordingly to |
| * http://www.w3.org/TR/REC-xml#entproc |
| * i.e. |
| * - Included in literal in entity values |
| * - Included as Paraemeter Entity reference within DTDs |
| */ |
| void |
| xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { |
| xmlChar *name; |
| xmlEntityPtr entity = NULL; |
| xmlParserInputPtr input; |
| |
| if (ctxt->token != 0) { |
| return; |
| } |
| if (RAW != '%') return; |
| switch(ctxt->instate) { |
| case XML_PARSER_CDATA_SECTION: |
| return; |
| case XML_PARSER_COMMENT: |
| return; |
| case XML_PARSER_START_TAG: |
| return; |
| case XML_PARSER_END_TAG: |
| return; |
| case XML_PARSER_EOF: |
| ctxt->errNo = XML_ERR_PEREF_AT_EOF; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, "PEReference at EOF\n"); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| return; |
| case XML_PARSER_PROLOG: |
| case XML_PARSER_START: |
| case XML_PARSER_MISC: |
| ctxt->errNo = XML_ERR_PEREF_IN_PROLOG; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n"); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| return; |
| case XML_PARSER_ENTITY_DECL: |
| case XML_PARSER_CONTENT: |
| case XML_PARSER_ATTRIBUTE_VALUE: |
| case XML_PARSER_PI: |
| case XML_PARSER_SYSTEM_LITERAL: |
| /* we just ignore it there */ |
| return; |
| case XML_PARSER_EPILOG: |
| ctxt->errNo = XML_ERR_PEREF_IN_EPILOG; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n"); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| return; |
| case XML_PARSER_ENTITY_VALUE: |
| /* |
| * NOTE: in the case of entity values, we don't do the |
| * substitution here since we need the literal |
| * entity value to be able to save the internal |
| * subset of the document. |
| * This will be handled by xmlStringDecodeEntities |
| */ |
| return; |
| case XML_PARSER_DTD: |
| /* |
| * [WFC: Well-Formedness Constraint: PEs in Internal Subset] |
| * In the internal DTD subset, parameter-entity references |
| * can occur only where markup declarations can occur, not |
| * within markup declarations. |
| * In that case this is handled in xmlParseMarkupDecl |
| */ |
| if ((ctxt->external == 0) && (ctxt->inputNr == 1)) |
| return; |
| break; |
| case XML_PARSER_IGNORE: |
| return; |
| } |
| |
| NEXT; |
| name = xmlParseName(ctxt); |
| if (xmlParserDebugEntities) |
| xmlGenericError(xmlGenericErrorContext, |
| "PE Reference: %s\n", name); |
| if (name == NULL) { |
| ctxt->errNo = XML_ERR_PEREF_NO_NAME; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n"); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| } else { |
| if (RAW == ';') { |
| NEXT; |
| if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) |
| entity = ctxt->sax->getParameterEntity(ctxt->userData, name); |
| if (entity == NULL) { |
| |
| /* |
| * [ WFC: Entity Declared ] |
| * In a document without any DTD, a document with only an |
| * internal DTD subset which contains no parameter entity |
| * references, or a document with "standalone='yes'", ... |
| * ... The declaration of a parameter entity must precede |
| * any reference to it... |
| */ |
| if ((ctxt->standalone == 1) || |
| ((ctxt->hasExternalSubset == 0) && |
| (ctxt->hasPErefs == 0))) { |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "PEReference: %%%s; not found\n", name); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| } else { |
| /* |
| * [ VC: Entity Declared ] |
| * In a document with an external subset or external |
| * parameter entities with "standalone='no'", ... |
| * ... The declaration of a parameter entity must precede |
| * any reference to it... |
| */ |
| if ((!ctxt->disableSAX) && |
| (ctxt->validate) && (ctxt->vctxt.error != NULL)) { |
| ctxt->vctxt.error(ctxt->vctxt.userData, |
| "PEReference: %%%s; not found\n", name); |
| } else if ((!ctxt->disableSAX) && |
| (ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) |
| ctxt->sax->warning(ctxt->userData, |
| "PEReference: %%%s; not found\n", name); |
| ctxt->valid = 0; |
| } |
| } else { |
| if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || |
| (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { |
| /* |
| * handle the extra spaces added before and after |
| * c.f. http://www.w3.org/TR/REC-xml#as-PE |
| * this is done independantly. |
| */ |
| input = xmlNewEntityInputStream(ctxt, entity); |
| xmlPushInput(ctxt, input); |
| if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && |
| (RAW == '<') && (NXT(1) == '?') && |
| (NXT(2) == 'x') && (NXT(3) == 'm') && |
| (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { |
| xmlParseTextDecl(ctxt); |
| } |
| if (ctxt->token == 0) |
| ctxt->token = ' '; |
| } else { |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "xmlHandlePEReference: %s is not a parameter entity\n", |
| name); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| } |
| } |
| } else { |
| ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "xmlHandlePEReference: expecting ';'\n"); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| } |
| xmlFree(name); |
| } |
| } |
| |
| /* |
| * Macro used to grow the current buffer. |
| */ |
| #define growBuffer(buffer) { \ |
| buffer##_size *= 2; \ |
| buffer = (xmlChar *) \ |
| xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \ |
| if (buffer == NULL) { \ |
| perror("realloc failed"); \ |
| return(NULL); \ |
| } \ |
| } |
| |
| /** |
| * xmlStringDecodeEntities: |
| * @ctxt: the parser context |
| * @str: the input string |
| * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF |
| * @end: an end marker xmlChar, 0 if none |
| * @end2: an end marker xmlChar, 0 if none |
| * @end3: an end marker xmlChar, 0 if none |
| * |
| * Takes a entity string content and process to do the adequate subtitutions. |
| * |
| * [67] Reference ::= EntityRef | CharRef |
| * |
| * [69] PEReference ::= '%' Name ';' |
| * |
| * Returns A newly allocated string with the substitution done. The caller |
| * must deallocate it ! |
| */ |
| xmlChar * |
| xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, |
| xmlChar end, xmlChar end2, xmlChar end3) { |
| xmlChar *buffer = NULL; |
| int buffer_size = 0; |
| |
| xmlChar *current = NULL; |
| xmlEntityPtr ent; |
| int c,l; |
| int nbchars = 0; |
| |
| if (str == NULL) |
| return(NULL); |
| |
| if (ctxt->depth > 40) { |
| ctxt->errNo = XML_ERR_ENTITY_LOOP; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "Detected entity reference loop\n"); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| return(NULL); |
| } |
| |
| /* |
| * allocate a translation buffer. |
| */ |
| buffer_size = XML_PARSER_BIG_BUFFER_SIZE; |
| buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); |
| if (buffer == NULL) { |
| perror("xmlDecodeEntities: malloc failed"); |
| return(NULL); |
| } |
| |
| /* |
| * Ok loop until we reach one of the ending char or a size limit. |
| * we are operating on already parsed values. |
| */ |
| c = CUR_SCHAR(str, l); |
| while ((c != 0) && (c != end) && /* non input consuming loop */ |
| (c != end2) && (c != end3)) { |
| |
| if (c == 0) break; |
| if ((c == '&') && (str[1] == '#')) { |
| int val = xmlParseStringCharRef(ctxt, &str); |
| if (val != 0) { |
| COPY_BUF(0,buffer,nbchars,val); |
| } |
| } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { |
| if (xmlParserDebugEntities) |
| xmlGenericError(xmlGenericErrorContext, |
| "String decoding Entity Reference: %.30s\n", |
| str); |
| ent = xmlParseStringEntityRef(ctxt, &str); |
| if ((ent != NULL) && |
| (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { |
| if (ent->content != NULL) { |
| COPY_BUF(0,buffer,nbchars,ent->content[0]); |
| } else { |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "internal error entity has no content\n"); |
| } |
| } else if ((ent != NULL) && (ent->content != NULL)) { |
| xmlChar *rep; |
| |
| ctxt->depth++; |
| rep = xmlStringDecodeEntities(ctxt, ent->content, what, |
| 0, 0, 0); |
| ctxt->depth--; |
| if (rep != NULL) { |
| current = rep; |
| while (*current != 0) { /* non input consuming loop */ |
| buffer[nbchars++] = *current++; |
| if (nbchars > |
| buffer_size - XML_PARSER_BUFFER_SIZE) { |
| growBuffer(buffer); |
| } |
| } |
| xmlFree(rep); |
| } |
| } else if (ent != NULL) { |
| int i = xmlStrlen(ent->name); |
| const xmlChar *cur = ent->name; |
| |
| buffer[nbchars++] = '&'; |
| if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) { |
| growBuffer(buffer); |
| } |
| for (;i > 0;i--) |
| buffer[nbchars++] = *cur++; |
| buffer[nbchars++] = ';'; |
| } |
| } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { |
| if (xmlParserDebugEntities) |
| xmlGenericError(xmlGenericErrorContext, |
| "String decoding PE Reference: %.30s\n", str); |
| ent = xmlParseStringPEReference(ctxt, &str); |
| if (ent != NULL) { |
| xmlChar *rep; |
| |
| ctxt->depth++; |
| rep = xmlStringDecodeEntities(ctxt, ent->content, what, |
| 0, 0, 0); |
| ctxt->depth--; |
| if (rep != NULL) { |
| current = rep; |
| while (*current != 0) { /* non input consuming loop */ |
| buffer[nbchars++] = *current++; |
| if (nbchars > |
| buffer_size - XML_PARSER_BUFFER_SIZE) { |
| growBuffer(buffer); |
| } |
| } |
| xmlFree(rep); |
| } |
| } |
| } else { |
| COPY_BUF(l,buffer,nbchars,c); |
| str += l; |
| if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { |
| growBuffer(buffer); |
| } |
| } |
| c = CUR_SCHAR(str, l); |
| } |
| buffer[nbchars++] = 0; |
| return(buffer); |
| } |
| |
| |
| /************************************************************************ |
| * * |
| * Commodity functions to handle xmlChars * |
| * * |
| ************************************************************************/ |
| |
| /** |
| * xmlStrndup: |
| * @cur: the input xmlChar * |
| * @len: the len of @cur |
| * |
| * a strndup for array of xmlChar's |
| * |
| * Returns a new xmlChar * or NULL |
| */ |
| xmlChar * |
| xmlStrndup(const xmlChar *cur, int len) { |
| xmlChar *ret; |
| |
| if ((cur == NULL) || (len < 0)) return(NULL); |
| ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar)); |
| if (ret == NULL) { |
| xmlGenericError(xmlGenericErrorContext, |
| "malloc of %ld byte failed\n", |
| (len + 1) * (long)sizeof(xmlChar)); |
| return(NULL); |
| } |
| memcpy(ret, cur, len * sizeof(xmlChar)); |
| ret[len] = 0; |
| return(ret); |
| } |
| |
| /** |
| * xmlStrdup: |
| * @cur: the input xmlChar * |
| * |
| * a strdup for array of xmlChar's. Since they are supposed to be |
| * encoded in UTF-8 or an encoding with 8bit based chars, we assume |
| * a termination mark of '0'. |
| * |
| * Returns a new xmlChar * or NULL |
| */ |
| xmlChar * |
| xmlStrdup(const xmlChar *cur) { |
| const xmlChar *p = cur; |
| |
| if (cur == NULL) return(NULL); |
| while (*p != 0) p++; /* non input consuming */ |
| return(xmlStrndup(cur, p - cur)); |
| } |
| |
| /** |
| * xmlCharStrndup: |
| * @cur: the input char * |
| * @len: the len of @cur |
| * |
| * a strndup for char's to xmlChar's |
| * |
| * Returns a new xmlChar * or NULL |
| */ |
| |
| xmlChar * |
| xmlCharStrndup(const char *cur, int len) { |
| int i; |
| xmlChar *ret; |
| |
| if ((cur == NULL) || (len < 0)) return(NULL); |
| ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar)); |
| if (ret == NULL) { |
| xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n", |
| (len + 1) * (long)sizeof(xmlChar)); |
| return(NULL); |
| } |
| for (i = 0;i < len;i++) |
| ret[i] = (xmlChar) cur[i]; |
| ret[len] = 0; |
| return(ret); |
| } |
| |
| /** |
| * xmlCharStrdup: |
| * @cur: the input char * |
| * @len: the len of @cur |
| * |
| * a strdup for char's to xmlChar's |
| * |
| * Returns a new xmlChar * or NULL |
| */ |
| |
| xmlChar * |
| xmlCharStrdup(const char *cur) { |
| const char *p = cur; |
| |
| if (cur == NULL) return(NULL); |
| while (*p != '\0') p++; /* non input consuming */ |
| return(xmlCharStrndup(cur, p - cur)); |
| } |
| |
| /** |
| * xmlStrcmp: |
| * @str1: the first xmlChar * |
| * @str2: the second xmlChar * |
| * |
| * a strcmp for xmlChar's |
| * |
| * Returns the integer result of the comparison |
| */ |
| |
| int |
| xmlStrcmp(const xmlChar *str1, const xmlChar *str2) { |
| register int tmp; |
| |
| if (str1 == str2) return(0); |
| if (str1 == NULL) return(-1); |
| if (str2 == NULL) return(1); |
| do { |
| tmp = *str1++ - *str2; |
| if (tmp != 0) return(tmp); |
| } while (*str2++ != 0); |
| return 0; |
| } |
| |
| /** |
| * xmlStrEqual: |
| * @str1: the first xmlChar * |
| * @str2: the second xmlChar * |
| * |
| * Check if both string are equal of have same content |
| * Should be a bit more readable and faster than xmlStrEqual() |
| * |
| * Returns 1 if they are equal, 0 if they are different |
| */ |
| |
| int |
| xmlStrEqual(const xmlChar *str1, const xmlChar *str2) { |
| if (str1 == str2) return(1); |
| if (str1 == NULL) return(0); |
| if (str2 == NULL) return(0); |
| do { |
| if (*str1++ != *str2) return(0); |
| } while (*str2++); |
| return(1); |
| } |
| |
| /** |
| * xmlStrncmp: |
| * @str1: the first xmlChar * |
| * @str2: the second xmlChar * |
| * @len: the max comparison length |
| * |
| * a strncmp for xmlChar's |
| * |
| * Returns the integer result of the comparison |
| */ |
| |
| int |
| xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) { |
| register int tmp; |
| |
| if (len <= 0) return(0); |
| if (str1 == str2) return(0); |
| if (str1 == NULL) return(-1); |
| if (str2 == NULL) return(1); |
| do { |
| tmp = *str1++ - *str2; |
| if (tmp != 0 || --len == 0) return(tmp); |
| } while (*str2++ != 0); |
| return 0; |
| } |
| |
| static xmlChar casemap[256] = { |
| 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, |
| 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, |
| 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, |
| 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, |
| 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, |
| 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, |
| 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, |
| 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, |
| 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67, |
| 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, |
| 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, |
| 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F, |
| 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67, |
| 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, |
| 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, |
| 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, |
| 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, |
| 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, |
| 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, |
| 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, |
| 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, |
| 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, |
| 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, |
| 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, |
| 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, |
| 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, |
| 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, |
| 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, |
| 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, |
| 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, |
| 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, |
| 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF |
| }; |
| |
| /** |
| * xmlStrcasecmp: |
| * @str1: the first xmlChar * |
| * @str2: the second xmlChar * |
| * |
| * a strcasecmp for xmlChar's |
| * |
| * Returns the integer result of the comparison |
| */ |
| |
| int |
| xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) { |
| register int tmp; |
| |
| if (str1 == str2) return(0); |
| if (str1 == NULL) return(-1); |
| if (str2 == NULL) return(1); |
| do { |
| tmp = casemap[*str1++] - casemap[*str2]; |
| if (tmp != 0) return(tmp); |
| } while (*str2++ != 0); |
| return 0; |
| } |
| |
| /** |
| * xmlStrncasecmp: |
| * @str1: the first xmlChar * |
| * @str2: the second xmlChar * |
| * @len: the max comparison length |
| * |
| * a strncasecmp for xmlChar's |
| * |
| * Returns the integer result of the comparison |
| */ |
| |
| int |
| xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) { |
| register int tmp; |
| |
| if (len <= 0) return(0); |
| if (str1 == str2) return(0); |
| if (str1 == NULL) return(-1); |
| if (str2 == NULL) return(1); |
| do { |
| tmp = casemap[*str1++] - casemap[*str2]; |
| if (tmp != 0 || --len == 0) return(tmp); |
| } while (*str2++ != 0); |
| return 0; |
| } |
| |
| /** |
| * xmlStrchr: |
| * @str: the xmlChar * array |
| * @val: the xmlChar to search |
| * |
| * a strchr for xmlChar's |
| * |
| * Returns the xmlChar * for the first occurence or NULL. |
| */ |
| |
| const xmlChar * |
| xmlStrchr(const xmlChar *str, xmlChar val) { |
| if (str == NULL) return(NULL); |
| while (*str != 0) { /* non input consuming */ |
| if (*str == val) return((xmlChar *) str); |
| str++; |
| } |
| return(NULL); |
| } |
| |
| /** |
| * xmlStrstr: |
| * @str: the xmlChar * array (haystack) |
| * @val: the xmlChar to search (needle) |
| * |
| * a strstr for xmlChar's |
| * |
| * Returns the xmlChar * for the first occurence or NULL. |
| */ |
| |
| const xmlChar * |
| xmlStrstr(const xmlChar *str, xmlChar *val) { |
| int n; |
| |
| if (str == NULL) return(NULL); |
| if (val == NULL) return(NULL); |
| n = xmlStrlen(val); |
| |
| if (n == 0) return(str); |
| while (*str != 0) { /* non input consuming */ |
| if (*str == *val) { |
| if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str); |
| } |
| str++; |
| } |
| return(NULL); |
| } |
| |
| /** |
| * xmlStrcasestr: |
| * @str: the xmlChar * array (haystack) |
| * @val: the xmlChar to search (needle) |
| * |
| * a case-ignoring strstr for xmlChar's |
| * |
| * Returns the xmlChar * for the first occurence or NULL. |
| */ |
| |
| const xmlChar * |
| xmlStrcasestr(const xmlChar *str, xmlChar *val) { |
| int n; |
| |
| if (str == NULL) return(NULL); |
| if (val == NULL) return(NULL); |
| n = xmlStrlen(val); |
| |
| if (n == 0) return(str); |
| while (*str != 0) { /* non input consuming */ |
| if (casemap[*str] == casemap[*val]) |
| if (!xmlStrncasecmp(str, val, n)) return(str); |
| str++; |
| } |
| return(NULL); |
| } |
| |
| /** |
| * xmlStrsub: |
| * @str: the xmlChar * array (haystack) |
| * @start: the index of the first char (zero based) |
| * @len: the length of the substring |
| * |
| * Extract a substring of a given string |
| * |
| * Returns the xmlChar * for the first occurence or NULL. |
| */ |
| |
| xmlChar * |
| xmlStrsub(const xmlChar *str, int start, int len) { |
| int i; |
| |
| if (str == NULL) return(NULL); |
| if (start < 0) return(NULL); |
| if (len < 0) return(NULL); |
| |
| for (i = 0;i < start;i++) { |
| if (*str == 0) return(NULL); |
| str++; |
| } |
| if (*str == 0) return(NULL); |
| return(xmlStrndup(str, len)); |
| } |
| |
| /** |
| * xmlStrlen: |
| * @str: the xmlChar * array |
| * |
| * length of a xmlChar's string |
| * |
| * Returns the number of xmlChar contained in the ARRAY. |
| */ |
| |
| int |
| xmlStrlen(const xmlChar *str) { |
| int len = 0; |
| |
| if (str == NULL) return(0); |
| while (*str != 0) { /* non input consuming */ |
| str++; |
| len++; |
| } |
| return(len); |
| } |
| |
| /** |
| * xmlStrncat: |
| * @cur: the original xmlChar * array |
| * @add: the xmlChar * array added |
| * @len: the length of @add |
| * |
| * a strncat for array of xmlChar's, it will extend cur with the len |
| * first bytes of @add. |
| * |
| * Returns a new xmlChar *, the original @cur is reallocated if needed |
| * and should not be freed |
| */ |
| |
| xmlChar * |
| xmlStrncat(xmlChar *cur, const xmlChar *add, int len) { |
| int size; |
| xmlChar *ret; |
| |
| if ((add == NULL) || (len == 0)) |
| return(cur); |
| if (cur == NULL) |
| return(xmlStrndup(add, len)); |
| |
| size = xmlStrlen(cur); |
| ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar)); |
| if (ret == NULL) { |
| xmlGenericError(xmlGenericErrorContext, |
| "xmlStrncat: realloc of %ld byte failed\n", |
| (size + len + 1) * (long)sizeof(xmlChar)); |
| return(cur); |
| } |
| memcpy(&ret[size], add, len * sizeof(xmlChar)); |
| ret[size + len] = 0; |
| return(ret); |
| } |
| |
| /** |
| * xmlStrcat: |
| * @cur: the original xmlChar * array |
| * @add: the xmlChar * array added |
| * |
| * a strcat for array of xmlChar's. Since they are supposed to be |
| * encoded in UTF-8 or an encoding with 8bit based chars, we assume |
| * a termination mark of '0'. |
| * |
| * Returns a new xmlChar * containing the concatenated string. |
| */ |
| xmlChar * |
| xmlStrcat(xmlChar *cur, const xmlChar *add) { |
| const xmlChar *p = add; |
| |
| if (add == NULL) return(cur); |
| if (cur == NULL) |
| return(xmlStrdup(add)); |
| |
| while (*p != 0) p++; /* non input consuming */ |
| return(xmlStrncat(cur, add, p - add)); |
| } |
| |
| /************************************************************************ |
| * * |
| * Commodity functions, cleanup needed ? * |
| * * |
| ************************************************************************/ |
| |
| /** |
| * areBlanks: |
| * @ctxt: an XML parser context |
| * @str: a xmlChar * |
| * @len: the size of @str |
| * |
| * Is this a sequence of blank chars that one can ignore ? |
| * |
| * Returns 1 if ignorable 0 otherwise. |
| */ |
| |
| static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) { |
| int i, ret; |
| xmlNodePtr lastChild; |
| |
| if (ctxt->keepBlanks) |
| return(0); |
| |
| /* |
| * Check for xml:space value. |
| */ |
| if (*(ctxt->space) == 1) |
| return(0); |
| |
| /* |
| * Check that the string is made of blanks |
| */ |
| for (i = 0;i < len;i++) |
| if (!(IS_BLANK(str[i]))) return(0); |
| |
| /* |
| * Look if the element is mixed content in the Dtd if available |
| */ |
| if (ctxt->myDoc != NULL) { |
| ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); |
| if (ret == 0) return(1); |
| if (ret == 1) return(0); |
| } |
| |
| /* |
| * Otherwise, heuristic :-\ |
| */ |
| if (RAW != '<') return(0); |
| if (ctxt->node == NULL) return(0); |
| if ((ctxt->node->children == NULL) && |
| (RAW == '<') && (NXT(1) == '/')) return(0); |
| |
| lastChild = xmlGetLastChild(ctxt->node); |
| if (lastChild == NULL) { |
| if (ctxt->node->content != NULL) return(0); |
| } else if (xmlNodeIsText(lastChild)) |
| return(0); |
| else if ((ctxt->node->children != NULL) && |
| (xmlNodeIsText(ctxt->node->children))) |
| return(0); |
| return(1); |
| } |
| |
| /* |
| * Forward definition for recusive behaviour. |
| */ |
| void xmlParsePEReference(xmlParserCtxtPtr ctxt); |
| void xmlParseReference(xmlParserCtxtPtr ctxt); |
| |
| /************************************************************************ |
| * * |
| * Extra stuff for namespace support * |
| * Relates to http://www.w3.org/TR/WD-xml-names * |
| * * |
| ************************************************************************/ |
| |
| /** |
| * xmlSplitQName: |
| * @ctxt: an XML parser context |
| * @name: an XML parser context |
| * @prefix: a xmlChar ** |
| * |
| * parse an UTF8 encoded XML qualified name string |
| * |
| * [NS 5] QName ::= (Prefix ':')? LocalPart |
| * |
| * [NS 6] Prefix ::= NCName |
| * |
| * [NS 7] LocalPart ::= NCName |
| * |
| * Returns the local part, and prefix is updated |
| * to get the Prefix if any. |
| */ |
| |
| xmlChar * |
| xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { |
| xmlChar buf[XML_MAX_NAMELEN + 5]; |
| xmlChar *buffer = NULL; |
| int len = 0; |
| int max = XML_MAX_NAMELEN; |
| xmlChar *ret = NULL; |
| const xmlChar *cur = name; |
| int c; |
| |
| *prefix = NULL; |
| |
| #ifndef XML_XML_NAMESPACE |
| /* xml: prefix is not really a namespace */ |
| if ((cur[0] == 'x') && (cur[1] == 'm') && |
| (cur[2] == 'l') && (cur[3] == ':')) |
| return(xmlStrdup(name)); |
| #endif |
| |
| /* nasty but valid */ |
| if (cur[0] == ':') |
| return(xmlStrdup(name)); |
| |
| c = *cur++; |
| while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ |
| buf[len++] = c; |
| c = *cur++; |
| } |
| if (len >= max) { |
| /* |
| * Okay someone managed to make a huge name, so he's ready to pay |
| * for the processing speed. |
| */ |
| max = len * 2; |
| |
| buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); |
| if (buffer == NULL) { |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "xmlSplitQName: out of memory\n"); |
| return(NULL); |
| } |
| memcpy(buffer, buf, len); |
| while ((c != 0) && (c != ':')) { /* tested bigname.xml */ |
| if (len + 10 > max) { |
| max *= 2; |
| buffer = (xmlChar *) xmlRealloc(buffer, |
| max * sizeof(xmlChar)); |
| if (buffer == NULL) { |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "xmlSplitQName: out of memory\n"); |
| return(NULL); |
| } |
| } |
| buffer[len++] = c; |
| c = *cur++; |
| } |
| buffer[len] = 0; |
| } |
| |
| if (buffer == NULL) |
| ret = xmlStrndup(buf, len); |
| else { |
| ret = buffer; |
| buffer = NULL; |
| max = XML_MAX_NAMELEN; |
| } |
| |
| |
| if (c == ':') { |
| c = *cur++; |
| if (c == 0) return(ret); |
| *prefix = ret; |
| len = 0; |
| |
| while ((c != 0) && (len < max)) { /* tested bigname2.xml */ |
| buf[len++] = c; |
| c = *cur++; |
| } |
| if (len >= max) { |
| /* |
| * Okay someone managed to make a huge name, so he's ready to pay |
| * for the processing speed. |
| */ |
| max = len * 2; |
| |
| buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); |
| if (buffer == NULL) { |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "xmlSplitQName: out of memory\n"); |
| return(NULL); |
| } |
| memcpy(buffer, buf, len); |
| while (c != 0) { /* tested bigname2.xml */ |
| if (len + 10 > max) { |
| max *= 2; |
| buffer = (xmlChar *) xmlRealloc(buffer, |
| max * sizeof(xmlChar)); |
| if (buffer == NULL) { |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "xmlSplitQName: out of memory\n"); |
| return(NULL); |
| } |
| } |
| buffer[len++] = c; |
| c = *cur++; |
| } |
| buffer[len] = 0; |
| } |
| |
| if (buffer == NULL) |
| ret = xmlStrndup(buf, len); |
| else { |
| ret = buffer; |
| } |
| } |
| |
| return(ret); |
| } |
| |
| /************************************************************************ |
| * * |
| * The parser itself * |
| * Relates to http://www.w3.org/TR/REC-xml * |
| * * |
| ************************************************************************/ |
| |
| xmlChar *xmlParseNameComplex(xmlParserCtxtPtr ctxt); |
| /** |
| * xmlParseName: |
| * @ctxt: an XML parser context |
| * |
| * parse an XML name. |
| * |
| * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | |
| * CombiningChar | Extender |
| * |
| * [5] Name ::= (Letter | '_' | ':') (NameChar)* |
| * |
| * [6] Names ::= Name (S Name)* |
| * |
| * Returns the Name parsed or NULL |
| */ |
| |
| xmlChar * |
| xmlParseName(xmlParserCtxtPtr ctxt) { |
| const xmlChar *in; |
| xmlChar *ret; |
| int count = 0; |
| |
| GROW; |
| |
| /* |
| * Accelerator for simple ASCII names |
| */ |
| in = ctxt->input->cur; |
| if (((*in >= 0x61) && (*in <= 0x7A)) || |
| ((*in >= 0x41) && (*in <= 0x5A)) || |
| (*in == '_') || (*in == ':')) { |
| in++; |
| while (((*in >= 0x61) && (*in <= 0x7A)) || |
| ((*in >= 0x41) && (*in <= 0x5A)) || |
| ((*in >= 0x30) && (*in <= 0x39)) || |
| (*in == '_') || (*in == ':')) |
| in++; |
| if ((*in == ' ') || (*in == '>') || (*in == '/')) { |
| count = in - ctxt->input->cur; |
| ret = xmlStrndup(ctxt->input->cur, count); |
| ctxt->input->cur = in; |
| return(ret); |
| } |
| } |
| return(xmlParseNameComplex(ctxt)); |
| } |
| |
| xmlChar * |
| xmlParseNameComplex(xmlParserCtxtPtr ctxt) { |
| xmlChar buf[XML_MAX_NAMELEN + 5]; |
| int len = 0, l; |
| int c; |
| int count = 0; |
| |
| /* |
| * Handler for more complex cases |
| */ |
| GROW; |
| c = CUR_CHAR(l); |
| if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ |
| (!IS_LETTER(c) && (c != '_') && |
| (c != ':'))) { |
| return(NULL); |
| } |
| |
| while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ |
| ((IS_LETTER(c)) || (IS_DIGIT(c)) || |
| (c == '.') || (c == '-') || |
| (c == '_') || (c == ':') || |
| (IS_COMBINING(c)) || |
| (IS_EXTENDER(c)))) { |
| if (count++ > 100) { |
| count = 0; |
| GROW; |
| } |
| COPY_BUF(l,buf,len,c); |
| NEXTL(l); |
| c = CUR_CHAR(l); |
| if (len >= XML_MAX_NAMELEN) { |
| /* |
| * Okay someone managed to make a huge name, so he's ready to pay |
| * for the processing speed. |
| */ |
| xmlChar *buffer; |
| int max = len * 2; |
| |
| buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); |
| if (buffer == NULL) { |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "xmlParseNameComplex: out of memory\n"); |
| return(NULL); |
| } |
| memcpy(buffer, buf, len); |
| while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */ |
| (c == '.') || (c == '-') || |
| (c == '_') || (c == ':') || |
| (IS_COMBINING(c)) || |
| (IS_EXTENDER(c))) { |
| if (count++ > 100) { |
| count = 0; |
| GROW; |
| } |
| if (len + 10 > max) { |
| max *= 2; |
| buffer = (xmlChar *) xmlRealloc(buffer, |
| max * sizeof(xmlChar)); |
| if (buffer == NULL) { |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "xmlParseNameComplex: out of memory\n"); |
| return(NULL); |
| } |
| } |
| COPY_BUF(l,buffer,len,c); |
| NEXTL(l); |
| c = CUR_CHAR(l); |
| } |
| buffer[len] = 0; |
| return(buffer); |
| } |
| } |
| return(xmlStrndup(buf, len)); |
| } |
| |
| /** |
| * xmlParseStringName: |
| * @ctxt: an XML parser context |
| * @str: a pointer to the string pointer (IN/OUT) |
| * |
| * parse an XML name. |
| * |
| * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | |
| * CombiningChar | Extender |
| * |
| * [5] Name ::= (Letter | '_' | ':') (NameChar)* |
| * |
| * [6] Names ::= Name (S Name)* |
| * |
| * Returns the Name parsed or NULL. The str pointer |
| * is updated to the current location in the string. |
| */ |
| |
| static xmlChar * |
| xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { |
| xmlChar buf[XML_MAX_NAMELEN + 5]; |
| const xmlChar *cur = *str; |
| int len = 0, l; |
| int c; |
| |
| c = CUR_SCHAR(cur, l); |
| if (!IS_LETTER(c) && (c != '_') && |
| (c != ':')) { |
| return(NULL); |
| } |
| |
| while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ |
| (c == '.') || (c == '-') || |
| (c == '_') || (c == ':') || |
| (IS_COMBINING(c)) || |
| (IS_EXTENDER(c))) { |
| COPY_BUF(l,buf,len,c); |
| cur += l; |
| c = CUR_SCHAR(cur, l); |
| if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ |
| /* |
| * Okay someone managed to make a huge name, so he's ready to pay |
| * for the processing speed. |
| */ |
| xmlChar *buffer; |
| int max = len * 2; |
| |
| buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); |
| if (buffer == NULL) { |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "xmlParseStringName: out of memory\n"); |
| return(NULL); |
| } |
| memcpy(buffer, buf, len); |
| while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ |
| (c == '.') || (c == '-') || |
| (c == '_') || (c == ':') || |
| (IS_COMBINING(c)) || |
| (IS_EXTENDER(c))) { |
| if (len + 10 > max) { |
| max *= 2; |
| buffer = (xmlChar *) xmlRealloc(buffer, |
| max * sizeof(xmlChar)); |
| if (buffer == NULL) { |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "xmlParseStringName: out of memory\n"); |
| return(NULL); |
| } |
| } |
| COPY_BUF(l,buffer,len,c); |
| cur += l; |
| c = CUR_SCHAR(cur, l); |
| } |
| buffer[len] = 0; |
| *str = cur; |
| return(buffer); |
| } |
| } |
| *str = cur; |
| return(xmlStrndup(buf, len)); |
| } |
| |
| /** |
| * xmlParseNmtoken: |
| * @ctxt: an XML parser context |
| * |
| * parse an XML Nmtoken. |
| * |
| * [7] Nmtoken ::= (NameChar)+ |
| * |
| * [8] Nmtokens ::= Nmtoken (S Nmtoken)* |
| * |
| * Returns the Nmtoken parsed or NULL |
| */ |
| |
| xmlChar * |
| xmlParseNmtoken(xmlParserCtxtPtr ctxt) { |
| xmlChar buf[XML_MAX_NAMELEN + 5]; |
| int len = 0, l; |
| int c; |
| int count = 0; |
| |
| GROW; |
| c = CUR_CHAR(l); |
| |
| while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ |
| (c == '.') || (c == '-') || |
| (c == '_') || (c == ':') || |
| (IS_COMBINING(c)) || |
| (IS_EXTENDER(c))) { |
| if (count++ > 100) { |
| count = 0; |
| GROW; |
| } |
| COPY_BUF(l,buf,len,c); |
| NEXTL(l); |
| c = CUR_CHAR(l); |
| if (len >= XML_MAX_NAMELEN) { |
| /* |
| * Okay someone managed to make a huge token, so he's ready to pay |
| * for the processing speed. |
| */ |
| xmlChar *buffer; |
| int max = len * 2; |
| |
| buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); |
| if (buffer == NULL) { |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "xmlParseNmtoken: out of memory\n"); |
| return(NULL); |
| } |
| memcpy(buffer, buf, len); |
| while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ |
| (c == '.') || (c == '-') || |
| (c == '_') || (c == ':') || |
| (IS_COMBINING(c)) || |
| (IS_EXTENDER(c))) { |
| if (count++ > 100) { |
| count = 0; |
| GROW; |
| } |
| if (len + 10 > max) { |
| max *= 2; |
| buffer = (xmlChar *) xmlRealloc(buffer, |
| max * sizeof(xmlChar)); |
| if (buffer == NULL) { |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "xmlParseNameComplex: out of memory\n"); |
| return(NULL); |
| } |
| } |
| COPY_BUF(l,buffer,len,c); |
| NEXTL(l); |
| c = CUR_CHAR(l); |
| } |
| buffer[len] = 0; |
| return(buffer); |
| } |
| } |
| if (len == 0) |
| return(NULL); |
| return(xmlStrndup(buf, len)); |
| } |
| |
| /** |
| * xmlParseEntityValue: |
| * @ctxt: an XML parser context |
| * @orig: if non-NULL store a copy of the original entity value |
| * |
| * parse a value for ENTITY declarations |
| * |
| * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | |
| * "'" ([^%&'] | PEReference | Reference)* "'" |
| * |
| * Returns the EntityValue parsed with reference substitued or NULL |
| */ |
| |
| xmlChar * |
| xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { |
| xmlChar *buf = NULL; |
| int len = 0; |
| int size = XML_PARSER_BUFFER_SIZE; |
| int c, l; |
| xmlChar stop; |
| xmlChar *ret = NULL; |
| const xmlChar *cur = NULL; |
| xmlParserInputPtr input; |
| |
| if (RAW == '"') stop = '"'; |
| else if (RAW == '\'') stop = '\''; |
| else { |
| ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n"); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| return(NULL); |
| } |
| buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); |
| if (buf == NULL) { |
| xmlGenericError(xmlGenericErrorContext, |
| "malloc of %d byte failed\n", size); |
| return(NULL); |
| } |
| |
| /* |
| * The content of the entity definition is copied in a buffer. |
| */ |
| |
| ctxt->instate = XML_PARSER_ENTITY_VALUE; |
| input = ctxt->input; |
| GROW; |
| NEXT; |
| c = CUR_CHAR(l); |
| /* |
| * NOTE: 4.4.5 Included in Literal |
| * When a parameter entity reference appears in a literal entity |
| * value, ... a single or double quote character in the replacement |
| * text is always treated as a normal data character and will not |
| * terminate the literal. |
| * In practice it means we stop the loop only when back at parsing |
| * the initial entity and the quote is found |
| */ |
| while ((IS_CHAR(c)) && ((c != stop) || /* checked */ |
| (ctxt->input != input))) { |
| if (len + 5 >= size) { |
| size *= 2; |
| buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); |
| if (buf == NULL) { |
| xmlGenericError(xmlGenericErrorContext, |
| "realloc of %d byte failed\n", size); |
| return(NULL); |
| } |
| } |
| COPY_BUF(l,buf,len,c); |
| NEXTL(l); |
| /* |
| * Pop-up of finished entities. |
| */ |
| while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ |
| xmlPopInput(ctxt); |
| |
| GROW; |
| c = CUR_CHAR(l); |
| if (c == 0) { |
| GROW; |
| c = CUR_CHAR(l); |
| } |
| } |
| buf[len] = 0; |
| |
| /* |
| * Raise problem w.r.t. '&' and '%' being used in non-entities |
| * reference constructs. Note Charref will be handled in |
| * xmlStringDecodeEntities() |
| */ |
| cur = buf; |
| while (*cur != 0) { /* non input consuming */ |
| if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { |
| xmlChar *name; |
| xmlChar tmp = *cur; |
| |
| cur++; |
| name = xmlParseStringName(ctxt, &cur); |
| if ((name == NULL) || (*cur != ';')) { |
| ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "EntityValue: '%c' forbidden except for entities references\n", |
| tmp); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| } |
| if ((ctxt->inSubset == 1) && (tmp == '%')) { |
| ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "EntityValue: PEReferences forbidden in internal subset\n", |
| tmp); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| } |
| if (name != NULL) |
| xmlFree(name); |
| } |
| cur++; |
| } |
| |
| /* |
| * Then PEReference entities are substituted. |
| */ |
| if (c != stop) { |
| ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n"); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| xmlFree(buf); |
| } else { |
| NEXT; |
| /* |
| * NOTE: 4.4.7 Bypassed |
| * When a general entity reference appears in the EntityValue in |
| * an entity declaration, it is bypassed and left as is. |
| * so XML_SUBSTITUTE_REF is not set here. |
| */ |
| ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, |
| 0, 0, 0); |
| if (orig != NULL) |
| *orig = buf; |
| else |
| xmlFree(buf); |
| } |
| |
| return(ret); |
| } |
| |
| /** |
| * xmlParseAttValue: |
| * @ctxt: an XML parser context |
| * |
| * parse a value for an attribute |
| * Note: the parser won't do substitution of entities here, this |
| * will be handled later in xmlStringGetNodeList |
| * |
| * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | |
| * "'" ([^<&'] | Reference)* "'" |
| * |
| * 3.3.3 Attribute-Value Normalization: |
| * Before the value of an attribute is passed to the application or |
| * checked for validity, the XML processor must normalize it as follows: |
| * - a character reference is processed by appending the referenced |
| * character to the attribute value |
| * - an entity reference is processed by recursively processing the |
| * replacement text of the entity |
| * - a whitespace character (#x20, #xD, #xA, #x9) is processed by |
| * appending #x20 to the normalized value, except that only a single |
| * #x20 is appended for a "#xD#xA" sequence that is part of an external |
| * parsed entity or the literal entity value of an internal parsed entity |
| * - other characters are processed by appending them to the normalized value |
| * If the declared value is not CDATA, then the XML processor must further |
| * process the normalized attribute value by discarding any leading and |
| * trailing space (#x20) characters, and by replacing sequences of space |
| * (#x20) characters by a single space (#x20) character. |
| * All attributes for which no declaration has been read should be treated |
| * by a non-validating parser as if declared CDATA. |
| * |
| * Returns the AttValue parsed or NULL. The value has to be freed by the caller. |
| */ |
| |
| xmlChar * |
| xmlParseAttValue(xmlParserCtxtPtr ctxt) { |
| xmlChar limit = 0; |
| xmlChar *buf = NULL; |
| int len = 0; |
| int buf_size = 0; |
| int c, l; |
| xmlChar *current = NULL; |
| xmlEntityPtr ent; |
| |
| |
| SHRINK; |
| if (NXT(0) == '"') { |
| ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; |
| limit = '"'; |
| NEXT; |
| } else if (NXT(0) == '\'') { |
| limit = '\''; |
| ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; |
| NEXT; |
| } else { |
| ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n"); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| return(NULL); |
| } |
| |
| /* |
| * allocate a translation buffer. |
| */ |
| buf_size = XML_PARSER_BUFFER_SIZE; |
| buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar)); |
| if (buf == NULL) { |
| perror("xmlParseAttValue: malloc failed"); |
| return(NULL); |
| } |
| |
| /* |
| * Ok loop until we reach one of the ending char or a size limit. |
| */ |
| c = CUR_CHAR(l); |
| while (((NXT(0) != limit) && /* checked */ |
| (c != '<')) || (ctxt->token != 0)) { |
| if (c == 0) break; |
| if (ctxt->token == '&') { |
| /* |
| * The reparsing will be done in xmlStringGetNodeList() |
| * called by the attribute() function in SAX.c |
| */ |
| static xmlChar buffer[6] = "&"; |
| |
| if (len > buf_size - 10) { |
| growBuffer(buf); |
| } |
| current = &buffer[0]; |
| while (*current != 0) { /* non input consuming */ |
| buf[len++] = *current++; |
| } |
| ctxt->token = 0; |
| } else if (c == '&') { |
| if (NXT(1) == '#') { |
| int val = xmlParseCharRef(ctxt); |
| if (val == '&') { |
| /* |
| * The reparsing will be done in xmlStringGetNodeList() |
| * called by the attribute() function in SAX.c |
| */ |
| static xmlChar buffer[6] = "&"; |
| |
| if (len > buf_size - 10) { |
| growBuffer(buf); |
| } |
| current = &buffer[0]; |
| while (*current != 0) { /* non input consuming */ |
| buf[len++] = *current++; |
| } |
| } else { |
| if (len > buf_size - 10) { |
| growBuffer(buf); |
| } |
| len += xmlCopyChar(0, &buf[len], val); |
| } |
| } else { |
| ent = xmlParseEntityRef(ctxt); |
| if ((ent != NULL) && |
| (ctxt->replaceEntities != 0)) { |
| xmlChar *rep; |
| |
| if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { |
| rep = xmlStringDecodeEntities(ctxt, ent->content, |
| XML_SUBSTITUTE_REF, 0, 0, 0); |
| if (rep != NULL) { |
| current = rep; |
| while (*current != 0) { /* non input consuming */ |
| buf[len++] = *current++; |
| if (len > buf_size - 10) { |
| growBuffer(buf); |
| } |
| } |
| xmlFree(rep); |
| } |
| } else { |
| if (len > buf_size - 10) { |
| growBuffer(buf); |
| } |
| if (ent->content != NULL) |
| buf[len++] = ent->content[0]; |
| } |
| } else if (ent != NULL) { |
| int i = xmlStrlen(ent->name); |
| const xmlChar *cur = ent->name; |
| |
| /* |
| * This may look absurd but is needed to detect |
| * entities problems |
| */ |
| if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && |
| (ent->content != NULL)) { |
| xmlChar *rep; |
| rep = xmlStringDecodeEntities(ctxt, ent->content, |
| XML_SUBSTITUTE_REF, 0, 0, 0); |
| if (rep != NULL) |
| xmlFree(rep); |
| } |
| |
| /* |
| * Just output the reference |
| */ |
| buf[len++] = '&'; |
| if (len > buf_size - i - 10) { |
| growBuffer(buf); |
| } |
| for (;i > 0;i--) |
| buf[len++] = *cur++; |
| buf[len++] = ';'; |
| } |
| } |
| } else { |
| if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { |
| COPY_BUF(l,buf,len,0x20); |
| if (len > buf_size - 10) { |
| growBuffer(buf); |
| } |
| } else { |
| COPY_BUF(l,buf,len,c); |
| if (len > buf_size - 10) { |
| growBuffer(buf); |
| } |
| } |
| NEXTL(l); |
| } |
| GROW; |
| c = CUR_CHAR(l); |
| } |
| buf[len++] = 0; |
| if (RAW == '<') { |
| ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "Unescaped '<' not allowed in attributes values\n"); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| } else if (RAW != limit) { |
| ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n"); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| } else |
| NEXT; |
| return(buf); |
| } |
| |
| /** |
| * xmlParseSystemLiteral: |
| * @ctxt: an XML parser context |
| * |
| * parse an XML Literal |
| * |
| * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") |
| * |
| * Returns the SystemLiteral parsed or NULL |
| */ |
| |
| xmlChar * |
| xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { |
| xmlChar *buf = NULL; |
| int len = 0; |
| int size = XML_PARSER_BUFFER_SIZE; |
| int cur, l; |
| xmlChar stop; |
| int state = ctxt->instate; |
| int count = 0; |
| |
| SHRINK; |
| if (RAW == '"') { |
| NEXT; |
| stop = '"'; |
| } else if (RAW == '\'') { |
| NEXT; |
| stop = '\''; |
| } else { |
| ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "SystemLiteral \" or ' expected\n"); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| return(NULL); |
| } |
| |
| buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); |
| if (buf == NULL) { |
| xmlGenericError(xmlGenericErrorContext, |
| "malloc of %d byte failed\n", size); |
| return(NULL); |
| } |
| ctxt->instate = XML_PARSER_SYSTEM_LITERAL; |
| cur = CUR_CHAR(l); |
| while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ |
| if (len + 5 >= size) { |
| size *= 2; |
| buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); |
| if (buf == NULL) { |
| xmlGenericError(xmlGenericErrorContext, |
| "realloc of %d byte failed\n", size); |
| ctxt->instate = (xmlParserInputState) state; |
| return(NULL); |
| } |
| } |
| count++; |
| if (count > 50) { |
| GROW; |
| count = 0; |
| } |
| COPY_BUF(l,buf,len,cur); |
| NEXTL(l); |
| cur = CUR_CHAR(l); |
| if (cur == 0) { |
| GROW; |
| SHRINK; |
| cur = CUR_CHAR(l); |
| } |
| } |
| buf[len] = 0; |
| ctxt->instate = (xmlParserInputState) state; |
| if (!IS_CHAR(cur)) { |
| ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n"); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| } else { |
| NEXT; |
| } |
| return(buf); |
| } |
| |
| /** |
| * xmlParsePubidLiteral: |
| * @ctxt: an XML parser context |
| * |
| * parse an XML public literal |
| * |
| * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" |
| * |
| * Returns the PubidLiteral parsed or NULL. |
| */ |
| |
| xmlChar * |
| xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { |
| xmlChar *buf = NULL; |
| int len = 0; |
| int size = XML_PARSER_BUFFER_SIZE; |
| xmlChar cur; |
| xmlChar stop; |
| int count = 0; |
| |
| SHRINK; |
| if (RAW == '"') { |
| NEXT; |
| stop = '"'; |
| } else if (RAW == '\'') { |
| NEXT; |
| stop = '\''; |
| } else { |
| ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "SystemLiteral \" or ' expected\n"); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| return(NULL); |
| } |
| buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); |
| if (buf == NULL) { |
| xmlGenericError(xmlGenericErrorContext, |
| "malloc of %d byte failed\n", size); |
| return(NULL); |
| } |
| cur = CUR; |
| while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */ |
| if (len + 1 >= size) { |
| size *= 2; |
| buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); |
| if (buf == NULL) { |
| xmlGenericError(xmlGenericErrorContext, |
| "realloc of %d byte failed\n", size); |
| return(NULL); |
| } |
| } |
| buf[len++] = cur; |
| count++; |
| if (count > 50) { |
| GROW; |
| count = 0; |
| } |
| NEXT; |
| cur = CUR; |
| if (cur == 0) { |
| GROW; |
| SHRINK; |
| cur = CUR; |
| } |
| } |
| buf[len] = 0; |
| if (cur != stop) { |
| ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n"); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| } else { |
| NEXT; |
| } |
| return(buf); |
| } |
| |
| void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); |
| /** |
| * xmlParseCharData: |
| * @ctxt: an XML parser context |
| * @cdata: int indicating whether we are within a CDATA section |
| * |
| * parse a CharData section. |
| * if we are within a CDATA section ']]>' marks an end of section. |
| * |
| * The right angle bracket (>) may be represented using the string ">", |
| * and must, for compatibility, be escaped using ">" or a character |
| * reference when it appears in the string "]]>" in content, when that |
| * string is not marking the end of a CDATA section. |
| * |
| * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) |
| */ |
| |
| void |
| xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { |
| const xmlChar *in; |
| int nbchar = 0; |
| int line = ctxt->input->line; |
| int col = ctxt->input->col; |
| |
| SHRINK; |
| GROW; |
| /* |
| * Accelerated common case where input don't need to be |
| * modified before passing it to the handler. |
| */ |
| if ((ctxt->token == 0) && (!cdata)) { |
| in = ctxt->input->cur; |
| do { |
| while (((*in >= 0x20) && (*in != '<') && |
| (*in != '&') && (*in <= 0x7F)) || (*in == 0x09)) |
| in++; |
| if (*in == 0xA) { |
| ctxt->input->line++; |
| continue; /* while */ |
| } |
| nbchar = in - ctxt->input->cur; |
| if (nbchar > 0) { |
| if (IS_BLANK(*ctxt->input->cur) && |
| areBlanks(ctxt, ctxt->input->cur, nbchar)) { |
| if (ctxt->sax->ignorableWhitespace != NULL) |
| ctxt->sax->ignorableWhitespace(ctxt->userData, |
| ctxt->input->cur, nbchar); |
| } else { |
| if (ctxt->sax->characters != NULL) |
| ctxt->sax->characters(ctxt->userData, |
| ctxt->input->cur, nbchar); |
| } |
| } |
| ctxt->input->cur = in; |
| if (*in == 0xD) { |
| in++; |
| if (*in == 0xA) { |
| ctxt->input->cur = in; |
| in++; |
| ctxt->input->line++; |
| continue; /* while */ |
| } |
| in--; |
| } |
| if (*in == '<') { |
| return; |
| } |
| if (*in == '&') { |
| return; |
| } |
| SHRINK; |
| GROW; |
| in = ctxt->input->cur; |
| } while ((*in >= 0x20) && (*in <= 0x7F)); |
| nbchar = 0; |
| } |
| ctxt->input->line = line; |
| ctxt->input->col = col; |
| xmlParseCharDataComplex(ctxt, cdata); |
| } |
| |
| void |
| xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { |
| xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; |
| int nbchar = 0; |
| int cur, l; |
| int count = 0; |
| |
| SHRINK; |
| GROW; |
| cur = CUR_CHAR(l); |
| while (((cur != '<') || (ctxt->token == '<')) && /* checked */ |
| ((cur != '&') || (ctxt->token == '&')) && |
| (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { |
| if ((cur == ']') && (NXT(1) == ']') && |
| (NXT(2) == '>')) { |
| if (cdata) break; |
| else { |
| ctxt->errNo = XML_ERR_MISPLACED_CDATA_END; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "Sequence ']]>' not allowed in content\n"); |
| /* Should this be relaxed ??? I see a "must here */ |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| } |
| } |
| COPY_BUF(l,buf,nbchar,cur); |
| if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { |
| /* |
| * Ok the segment is to be consumed as chars. |
| */ |
| if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { |
| if (areBlanks(ctxt, buf, nbchar)) { |
| if (ctxt->sax->ignorableWhitespace != NULL) |
| ctxt->sax->ignorableWhitespace(ctxt->userData, |
| buf, nbchar); |
| } else { |
| if (ctxt->sax->characters != NULL) |
| ctxt->sax->characters(ctxt->userData, buf, nbchar); |
| } |
| } |
| nbchar = 0; |
| } |
| count++; |
| if (count > 50) { |
| GROW; |
| count = 0; |
| } |
| NEXTL(l); |
| cur = CUR_CHAR(l); |
| } |
| if (nbchar != 0) { |
| /* |
| * Ok the segment is to be consumed as chars. |
| */ |
| if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { |
| if (areBlanks(ctxt, buf, nbchar)) { |
| if (ctxt->sax->ignorableWhitespace != NULL) |
| ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); |
| } else { |
| if (ctxt->sax->characters != NULL) |
| ctxt->sax->characters(ctxt->userData, buf, nbchar); |
| } |
| } |
| } |
| } |
| |
| /** |
| * xmlParseExternalID: |
| * @ctxt: an XML parser context |
| * @publicID: a xmlChar** receiving PubidLiteral |
| * @strict: indicate whether we should restrict parsing to only |
| * production [75], see NOTE below |
| * |
| * Parse an External ID or a Public ID |
| * |
| * NOTE: Productions [75] and [83] interract badly since [75] can generate |
| * 'PUBLIC' S PubidLiteral S SystemLiteral |
| * |
| * [75] ExternalID ::= 'SYSTEM' S SystemLiteral |
| * | 'PUBLIC' S PubidLiteral S SystemLiteral |
| * |
| * [83] PublicID ::= 'PUBLIC' S PubidLiteral |
| * |
| * Returns the function returns SystemLiteral and in the second |
| * case publicID receives PubidLiteral, is strict is off |
| * it is possible to return NULL and have publicID set. |
| */ |
| |
| xmlChar * |
| xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { |
| xmlChar *URI = NULL; |
| |
| SHRINK; |
| |
| *publicID = NULL; |
| if ((RAW == 'S') && (NXT(1) == 'Y') && |
| (NXT(2) == 'S') && (NXT(3) == 'T') && |
| (NXT(4) == 'E') && (NXT(5) == 'M')) { |
| SKIP(6); |
| if (!IS_BLANK(CUR)) { |
| ctxt->errNo = XML_ERR_SPACE_REQUIRED; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "Space required after 'SYSTEM'\n"); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| } |
| SKIP_BLANKS; |
| URI = xmlParseSystemLiteral(ctxt); |
| if (URI == NULL) { |
| ctxt->errNo = XML_ERR_URI_REQUIRED; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "xmlParseExternalID: SYSTEM, no URI\n"); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| } |
| } else if ((RAW == 'P') && (NXT(1) == 'U') && |
| (NXT(2) == 'B') && (NXT(3) == 'L') && |
| (NXT(4) == 'I') && (NXT(5) == 'C')) { |
| SKIP(6); |
| if (!IS_BLANK(CUR)) { |
| ctxt->errNo = XML_ERR_SPACE_REQUIRED; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "Space required after 'PUBLIC'\n"); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| } |
| SKIP_BLANKS; |
| *publicID = xmlParsePubidLiteral(ctxt); |
| if (*publicID == NULL) { |
| ctxt->errNo = XML_ERR_PUBID_REQUIRED; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "xmlParseExternalID: PUBLIC, no Public Identifier\n"); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| } |
| if (strict) { |
| /* |
| * We don't handle [83] so "S SystemLiteral" is required. |
| */ |
| if (!IS_BLANK(CUR)) { |
| ctxt->errNo = XML_ERR_SPACE_REQUIRED; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "Space required after the Public Identifier\n"); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| } |
| } else { |
| /* |
| * We handle [83] so we return immediately, if |
| * "S SystemLiteral" is not detected. From a purely parsing |
| * point of view that's a nice mess. |
| */ |
| const xmlChar *ptr; |
| GROW; |
| |
| ptr = CUR_PTR; |
| if (!IS_BLANK(*ptr)) return(NULL); |
| |
| while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */ |
| if ((*ptr != '\'') && (*ptr != '"')) return(NULL); |
| } |
| SKIP_BLANKS; |
| URI = xmlParseSystemLiteral(ctxt); |
| if (URI == NULL) { |
| ctxt->errNo = XML_ERR_URI_REQUIRED; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "xmlParseExternalID: PUBLIC, no URI\n"); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| } |
| } |
| return(URI); |
| } |
| |
| /** |
| * xmlParseComment: |
| * @ctxt: an XML parser context |
| * |
| * Skip an XML (SGML) comment <!-- .... --> |
| * The spec says that "For compatibility, the string "--" (double-hyphen) |
| * must not occur within comments. " |
| * |
| * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' |
| */ |
| void |
| xmlParseComment(xmlParserCtxtPtr ctxt) { |
| xmlChar *buf = NULL; |
| int len; |
| int size = XML_PARSER_BUFFER_SIZE; |
| int q, ql; |
| int r, rl; |
| int cur, l; |
| xmlParserInputState state; |
| xmlParserInputPtr input = ctxt->input; |
| int count = 0; |
| |
| /* |
| * Check that there is a comment right here. |
| */ |
| if ((RAW != '<') || (NXT(1) != '!') || |
| (NXT(2) != '-') || (NXT(3) != '-')) return; |
| |
| state = ctxt->instate; |
| ctxt->instate = XML_PARSER_COMMENT; |
| SHRINK; |
| SKIP(4); |
| buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); |
| if (buf == NULL) { |
| xmlGenericError(xmlGenericErrorContext, |
| "malloc of %d byte failed\n", size); |
| ctxt->instate = state; |
| return; |
| } |
| q = CUR_CHAR(ql); |
| NEXTL(ql); |
| r = CUR_CHAR(rl); |
| NEXTL(rl); |
| cur = CUR_CHAR(l); |
| len = 0; |
| while (IS_CHAR(cur) && /* checked */ |
| ((cur != '>') || |
| (r != '-') || (q != '-'))) { |
| if ((r == '-') && (q == '-') && (len > 1)) { |
| ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "Comment must not contain '--' (double-hyphen)`\n"); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| } |
| if (len + 5 >= size) { |
| size *= 2; |
| buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); |
| if (buf == NULL) { |
| xmlGenericError(xmlGenericErrorContext, |
| "realloc of %d byte failed\n", size); |
| ctxt->instate = state; |
| return; |
| } |
| } |
| COPY_BUF(ql,buf,len,q); |
| q = r; |
| ql = rl; |
| r = cur; |
| rl = l; |
| |
| count++; |
| if (count > 50) { |
| GROW; |
| count = 0; |
| } |
| NEXTL(l); |
| cur = CUR_CHAR(l); |
| if (cur == 0) { |
| SHRINK; |
| GROW; |
| cur = CUR_CHAR(l); |
| } |
| } |
| buf[len] = 0; |
| if (!IS_CHAR(cur)) { |
| ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "Comment not terminated \n<!--%.50s\n", buf); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| xmlFree(buf); |
| } else { |
| if (input != ctxt->input) { |
| ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "Comment doesn't start and stop in the same entity\n"); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| } |
| NEXT; |
| if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && |
| (!ctxt->disableSAX)) |
| ctxt->sax->comment(ctxt->userData, buf); |
| xmlFree(buf); |
| } |
| ctxt->instate = state; |
| } |
| |
| /** |
| * xmlParsePITarget: |
| * @ctxt: an XML parser context |
| * |
| * parse the name of a PI |
| * |
| * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) |
| * |
| * Returns the PITarget name or NULL |
| */ |
| |
| xmlChar * |
| xmlParsePITarget(xmlParserCtxtPtr ctxt) { |
| xmlChar *name; |
| |
| name = xmlParseName(ctxt); |
| if ((name != NULL) && |
| ((name[0] == 'x') || (name[0] == 'X')) && |
| ((name[1] == 'm') || (name[1] == 'M')) && |
| ((name[2] == 'l') || (name[2] == 'L'))) { |
| int i; |
| if ((name[0] == 'x') && (name[1] == 'm') && |
| (name[2] == 'l') && (name[3] == 0)) { |
| ctxt->errNo = XML_ERR_RESERVED_XML_NAME; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "XML declaration allowed only at the start of the document\n"); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| return(name); |
| } else if (name[3] == 0) { |
| ctxt->errNo = XML_ERR_RESERVED_XML_NAME; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, "Invalid PI name\n"); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| return(name); |
| } |
| for (i = 0;;i++) { |
| if (xmlW3CPIs[i] == NULL) break; |
| if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) |
| return(name); |
| } |
| if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) { |
| ctxt->errNo = XML_ERR_RESERVED_XML_NAME; |
| ctxt->sax->warning(ctxt->userData, |
| "xmlParsePItarget: invalid name prefix 'xml'\n"); |
| } |
| } |
| return(name); |
| } |
| |
| /** |
| * xmlParsePI: |
| * @ctxt: an XML parser context |
| * |
| * parse an XML Processing Instruction. |
| * |
| * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' |
| * |
| * The processing is transfered to SAX once parsed. |
| */ |
| |
| void |
| xmlParsePI(xmlParserCtxtPtr ctxt) { |
| xmlChar *buf = NULL; |
| int len = 0; |
| int size = XML_PARSER_BUFFER_SIZE; |
| int cur, l; |
| xmlChar *target; |
| xmlParserInputState state; |
| int count = 0; |
| |
| if ((RAW == '<') && (NXT(1) == '?')) { |
| xmlParserInputPtr input = ctxt->input; |
| state = ctxt->instate; |
| ctxt->instate = XML_PARSER_PI; |
| /* |
| * this is a Processing Instruction. |
| */ |
| SKIP(2); |
| SHRINK; |
| |
| /* |
| * Parse the target name and check for special support like |
| * namespace. |
| */ |
| target = xmlParsePITarget(ctxt); |
| if (target != NULL) { |
| if ((RAW == '?') && (NXT(1) == '>')) { |
| if (input != ctxt->input) { |
| ctxt->errNo = XML_ERR_ENTITY_BOUNDARY; |
| if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) |
| ctxt->sax->error(ctxt->userData, |
| "PI declaration doesn't start and stop in the same entity\n"); |
| ctxt->wellFormed = 0; |
| ctxt->disableSAX = 1; |
| } |
| SKIP(2); |
| |
| /* |
| * SAX: PI detected. |
| */ |
| if ((ctxt->sax) && (!ctxt->disableSAX) && |
| (ctxt->sax->processingInstruction != NULL)) |
| ctxt->sax->processingInstruction(ctxt->userData, |
| target, NULL); |
| ctxt->instate = state; |
| xmlFree(target); |
| return; |
| } |
| buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); |
| if (buf == NULL) { |
| xmlGenericError(xmlGenericErrorContext, |
| "malloc of %d byte failed\n", size); |
| ctxt->instate = state; |
| return; |
| } |
| cur = CUR; |
| if (!IS_BLANK(cur)) { |
| ctxt->errNo = XML_ERR_SPACE_REQUIRED; |
| if |