blob: ee4c8087fe867ae17c29a1f1ad4bd9a830574d07 [file] [log] [blame]
* parser.h : Interfaces, constants and types related to the XML parser.
* See Copyright for the status of this software.
#ifndef __XML_PARSER_H__
#define __XML_PARSER_H__
#include <libxml/tree.h>
#include <libxml/valid.h>
#include <libxml/xmlIO.h>
#include <libxml/entities.h>
#ifdef __cplusplus
extern "C" {
* Constants.
* an xmlParserInput is an input flow for the XML processor.
* Each entity parsed is associated an xmlParserInput (except the
* few predefined ones). This is the case both for internal entities
* - in which case the flow is already completely in memory - or
* external entities - in which case we use the buf structure for
* progressive reading and I18N conversions to the internal UTF-8 format.
typedef void (* xmlParserInputDeallocate)(xmlChar *);
typedef struct _xmlParserInput xmlParserInput;
typedef xmlParserInput *xmlParserInputPtr;
struct _xmlParserInput {
/* Input buffer */
xmlParserInputBufferPtr buf; /* UTF-8 encoded buffer */
const char *filename; /* The file analyzed, if any */
const char *directory; /* the directory/base of teh file */
const xmlChar *base; /* Base of the array to parse */
const xmlChar *cur; /* Current char being parsed */
int length; /* length if known */
int line; /* Current line */
int col; /* Current column */
int consumed; /* How many xmlChars already consumed */
xmlParserInputDeallocate free; /* function to deallocate the base */
const xmlChar *encoding; /* the encoding string for entity */
const xmlChar *version; /* the version string for entity */
int standalone; /* Was that entity marked standalone */
* the parser can be asked to collect Node informations, i.e. at what
* place in the file they were detected.
* NOTE: This is off by default and not very well tested.
typedef struct _xmlParserNodeInfo xmlParserNodeInfo;
typedef xmlParserNodeInfo *xmlParserNodeInfoPtr;
struct _xmlParserNodeInfo {
const struct _xmlNode* node;
/* Position & line # that text that created the node begins & ends on */
unsigned long begin_pos;
unsigned long begin_line;
unsigned long end_pos;
unsigned long end_line;
typedef struct _xmlParserNodeInfoSeq xmlParserNodeInfoSeq;
typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr;
struct _xmlParserNodeInfoSeq {
unsigned long maximum;
unsigned long length;
xmlParserNodeInfo* buffer;
* The parser is now working also as a state based parser
* The recursive one use the stagte info for entities processing
typedef enum {
XML_PARSER_EOF = -1, /* nothing is to be parsed */
XML_PARSER_START = 0, /* nothing has been parsed */
XML_PARSER_MISC, /* Misc* before int subset */
XML_PARSER_PI, /* Whithin a processing instruction */
XML_PARSER_DTD, /* within some DTD content */
XML_PARSER_PROLOG, /* Misc* after internal subset */
XML_PARSER_COMMENT, /* within a comment */
XML_PARSER_START_TAG, /* within a start tag */
XML_PARSER_CONTENT, /* within the content */
XML_PARSER_CDATA_SECTION, /* within a CDATA section */
XML_PARSER_END_TAG, /* within a closing tag */
XML_PARSER_ENTITY_DECL, /* within an entity declaration */
XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */
XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */
XML_PARSER_EPILOG /* the Misc* after the last end tag */
} xmlParserInputState;
* The parser context.
* NOTE This doesn't completely defines the parser state, the (current ?)
* design of the parser uses recursive function calls since this allow
* and easy mapping from the production rules of the specification
* to the actual code. The drawback is that the actual function call
* also reflect the parser state. However most of the parsing routines
* takes as the only argument the parser context pointer, so migrating
* to a state based parser for progressive parsing shouldn't be too hard.
typedef struct _xmlParserCtxt xmlParserCtxt;
typedef xmlParserCtxt *xmlParserCtxtPtr;
struct _xmlParserCtxt {
struct _xmlSAXHandler *sax; /* The SAX handler */
void *userData; /* For SAX interface only, used by DOM build */
xmlDocPtr myDoc; /* the document being built */
int wellFormed; /* is the document well formed */
int replaceEntities; /* shall we replace entities ? */
const xmlChar *version; /* the XML version string */
const xmlChar *encoding; /* the declared encoding, if any */
int standalone; /* standalone document */
int html; /* an HTML(1)/Docbook(2) document */
/* Input stream stack */
xmlParserInputPtr input; /* Current input stream */
int inputNr; /* Number of current input streams */
int inputMax; /* Max number of input streams */
xmlParserInputPtr *inputTab; /* stack of inputs */
/* Node analysis stack only used for DOM building */
xmlNodePtr node; /* Current parsed Node */
int nodeNr; /* Depth of the parsing stack */
int nodeMax; /* Max depth of the parsing stack */
xmlNodePtr *nodeTab; /* array of nodes */
int record_info; /* Whether node info should be kept */
xmlParserNodeInfoSeq node_seq; /* info about each node parsed */
int errNo; /* error code */
int hasExternalSubset; /* reference and external subset */
int hasPErefs; /* the internal subset has PE refs */
int external; /* are we parsing an external entity */
int valid; /* is the document valid */
int validate; /* shall we try to validate ? */
xmlValidCtxt vctxt; /* The validity context */
xmlParserInputState instate; /* current type of input */
int token; /* next char look-ahead */
char *directory; /* the data directory */
/* Node name stack */
xmlChar *name; /* Current parsed Node */
int nameNr; /* Depth of the parsing stack */
int nameMax; /* Max depth of the parsing stack */
xmlChar * *nameTab; /* array of nodes */
long nbChars; /* number of xmlChar processed */
long checkIndex; /* used by progressive parsing lookup */
int keepBlanks; /* ugly but ... */
int disableSAX; /* SAX callbacks are disabled */
int inSubset; /* Parsing is in int 1/ext 2 subset */
xmlChar * intSubName; /* name of subset */
xmlChar * extSubURI; /* URI of external subset */
xmlChar * extSubSystem; /* SYSTEM ID of external subset */
/* xml:space values */
int * space; /* Should the parser preserve spaces */
int spaceNr; /* Depth of the parsing stack */
int spaceMax; /* Max depth of the parsing stack */
int * spaceTab; /* array of space infos */
int depth; /* to prevent entity substitution loops */
xmlParserInputPtr entity; /* used to check entities boundaries */
int charset; /* encoding of the in-memory content
actually an xmlCharEncoding */
int nodelen; /* Those two fields are there to */
int nodemem; /* Speed up large node parsing */
int pedantic; /* signal pedantic warnings */
void *_private; /* For user data, libxml won't touch it */
* a SAX Locator.
typedef struct _xmlSAXLocator xmlSAXLocator;
typedef xmlSAXLocator *xmlSAXLocatorPtr;
struct _xmlSAXLocator {
const xmlChar *(*getPublicId)(void *ctx);
const xmlChar *(*getSystemId)(void *ctx);
int (*getLineNumber)(void *ctx);
int (*getColumnNumber)(void *ctx);
* a SAX handler is bunch of callbacks called by the parser when processing
* of the input generate data or structure informations.
typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx,
const xmlChar *publicId, const xmlChar *systemId);
typedef void (*internalSubsetSAXFunc) (void *ctx, const xmlChar *name,
const xmlChar *ExternalID, const xmlChar *SystemID);
typedef void (*externalSubsetSAXFunc) (void *ctx, const xmlChar *name,
const xmlChar *ExternalID, const xmlChar *SystemID);
typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx,
const xmlChar *name);
typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx,
const xmlChar *name);
typedef void (*entityDeclSAXFunc) (void *ctx,
const xmlChar *name, int type, const xmlChar *publicId,
const xmlChar *systemId, xmlChar *content);
typedef void (*notationDeclSAXFunc)(void *ctx, const xmlChar *name,
const xmlChar *publicId, const xmlChar *systemId);
typedef void (*attributeDeclSAXFunc)(void *ctx, const xmlChar *elem,
const xmlChar *name, int type, int def,
const xmlChar *defaultValue, xmlEnumerationPtr tree);
typedef void (*elementDeclSAXFunc)(void *ctx, const xmlChar *name,
int type, xmlElementContentPtr content);
typedef void (*unparsedEntityDeclSAXFunc)(void *ctx,
const xmlChar *name, const xmlChar *publicId,
const xmlChar *systemId, const xmlChar *notationName);
typedef void (*setDocumentLocatorSAXFunc) (void *ctx,
xmlSAXLocatorPtr loc);
typedef void (*startDocumentSAXFunc) (void *ctx);
typedef void (*endDocumentSAXFunc) (void *ctx);
typedef void (*startElementSAXFunc) (void *ctx, const xmlChar *name,
const xmlChar **atts);
typedef void (*endElementSAXFunc) (void *ctx, const xmlChar *name);
typedef void (*attributeSAXFunc) (void *ctx, const xmlChar *name,
const xmlChar *value);
typedef void (*referenceSAXFunc) (void *ctx, const xmlChar *name);
typedef void (*charactersSAXFunc) (void *ctx, const xmlChar *ch,
int len);
typedef void (*ignorableWhitespaceSAXFunc) (void *ctx,
const xmlChar *ch, int len);
typedef void (*processingInstructionSAXFunc) (void *ctx,
const xmlChar *target, const xmlChar *data);
typedef void (*commentSAXFunc) (void *ctx, const xmlChar *value);
typedef void (*cdataBlockSAXFunc) (void *ctx, const xmlChar *value, int len);
typedef void (*warningSAXFunc) (void *ctx, const char *msg, ...);
typedef void (*errorSAXFunc) (void *ctx, const char *msg, ...);
typedef void (*fatalErrorSAXFunc) (void *ctx, const char *msg, ...);
typedef int (*isStandaloneSAXFunc) (void *ctx);
typedef int (*hasInternalSubsetSAXFunc) (void *ctx);
typedef int (*hasExternalSubsetSAXFunc) (void *ctx);
typedef struct _xmlSAXHandler xmlSAXHandler;
typedef xmlSAXHandler *xmlSAXHandlerPtr;
struct _xmlSAXHandler {
internalSubsetSAXFunc internalSubset;
isStandaloneSAXFunc isStandalone;
hasInternalSubsetSAXFunc hasInternalSubset;
hasExternalSubsetSAXFunc hasExternalSubset;
resolveEntitySAXFunc resolveEntity;
getEntitySAXFunc getEntity;
entityDeclSAXFunc entityDecl;
notationDeclSAXFunc notationDecl;
attributeDeclSAXFunc attributeDecl;
elementDeclSAXFunc elementDecl;
unparsedEntityDeclSAXFunc unparsedEntityDecl;
setDocumentLocatorSAXFunc setDocumentLocator;
startDocumentSAXFunc startDocument;
endDocumentSAXFunc endDocument;
startElementSAXFunc startElement;
endElementSAXFunc endElement;
referenceSAXFunc reference;
charactersSAXFunc characters;
ignorableWhitespaceSAXFunc ignorableWhitespace;
processingInstructionSAXFunc processingInstruction;
commentSAXFunc comment;
warningSAXFunc warning;
errorSAXFunc error;
fatalErrorSAXFunc fatalError;
getParameterEntitySAXFunc getParameterEntity;
cdataBlockSAXFunc cdataBlock;
externalSubsetSAXFunc externalSubset;
* External entity loaders types
typedef xmlParserInputPtr (*xmlExternalEntityLoader)(const char *URL,
const char *ID,
xmlParserCtxtPtr context);
* Global variables: just the default SAX interface tables and XML
* version infos.
extern const char *xmlParserVersion;
extern xmlSAXLocator xmlDefaultSAXLocator;
extern xmlSAXHandler xmlDefaultSAXHandler;
extern xmlSAXHandler htmlDefaultSAXHandler;
extern xmlSAXHandler sgmlDefaultSAXHandler;
* entity substitution default behaviour.
extern int xmlSubstituteEntitiesDefaultValue;
extern int xmlGetWarningsDefaultValue;
* Init/Cleanup
void xmlInitParser (void);
void xmlCleanupParser (void);
* Input functions
int xmlParserInputRead (xmlParserInputPtr in,
int len);
int xmlParserInputGrow (xmlParserInputPtr in,
int len);
* xmlChar handling
xmlChar * xmlStrdup (const xmlChar *cur);
xmlChar * xmlStrndup (const xmlChar *cur,
int len);
xmlChar * xmlStrsub (const xmlChar *str,
int start,
int len);
const xmlChar * xmlStrchr (const xmlChar *str,
xmlChar val);
const xmlChar * xmlStrstr (const xmlChar *str,
xmlChar *val);
const xmlChar * xmlStrcasestr (const xmlChar *str,
xmlChar *val);
int xmlStrcmp (const xmlChar *str1,
const xmlChar *str2);
int xmlStrncmp (const xmlChar *str1,
const xmlChar *str2,
int len);
int xmlStrcasecmp (const xmlChar *str1,
const xmlChar *str2);
int xmlStrncasecmp (const xmlChar *str1,
const xmlChar *str2,
int len);
int xmlStrEqual (const xmlChar *str1,
const xmlChar *str2);
int xmlStrlen (const xmlChar *str);
xmlChar * xmlStrcat (xmlChar *cur,
const xmlChar *add);
xmlChar * xmlStrncat (xmlChar *cur,
const xmlChar *add,
int len);
* Basic parsing Interfaces
xmlDocPtr xmlParseDoc (xmlChar *cur);
xmlDocPtr xmlParseMemory (char *buffer,
int size);
xmlDocPtr xmlParseFile (const char *filename);
int xmlSubstituteEntitiesDefault(int val);
int xmlKeepBlanksDefault (int val);
void xmlStopParser (xmlParserCtxtPtr ctxt);
int xmlPedanticParserDefault(int val);
* Recovery mode
xmlDocPtr xmlRecoverDoc (xmlChar *cur);
xmlDocPtr xmlRecoverMemory (char *buffer,
int size);
xmlDocPtr xmlRecoverFile (const char *filename);
* Less common routines and SAX interfaces
int xmlParseDocument (xmlParserCtxtPtr ctxt);
int xmlParseExtParsedEnt (xmlParserCtxtPtr ctxt);
xmlDocPtr xmlSAXParseDoc (xmlSAXHandlerPtr sax,
xmlChar *cur,
int recovery);
int xmlSAXUserParseFile (xmlSAXHandlerPtr sax,
void *user_data,
const char *filename);
int xmlSAXUserParseMemory (xmlSAXHandlerPtr sax,
void *user_data,
char *buffer,
int size);
xmlDocPtr xmlSAXParseMemory (xmlSAXHandlerPtr sax,
char *buffer,
int size,
int recovery);
xmlDocPtr xmlSAXParseFile (xmlSAXHandlerPtr sax,
const char *filename,
int recovery);
xmlDocPtr xmlSAXParseEntity (xmlSAXHandlerPtr sax,
const char *filename);
xmlDocPtr xmlParseEntity (const char *filename);
xmlDtdPtr xmlParseDTD (const xmlChar *ExternalID,
const xmlChar *SystemID);
xmlDtdPtr xmlSAXParseDTD (xmlSAXHandlerPtr sax,
const xmlChar *ExternalID,
const xmlChar *SystemID);
xmlDtdPtr xmlIOParseDTD (xmlSAXHandlerPtr sax,
xmlParserInputBufferPtr input,
xmlCharEncoding enc);
int xmlParseBalancedChunkMemory(xmlDocPtr doc,
xmlSAXHandlerPtr sax,
void *user_data,
int depth,
const xmlChar *string,
xmlNodePtr *list);
int xmlParseExternalEntity (xmlDocPtr doc,
xmlSAXHandlerPtr sax,
void *user_data,
int depth,
const xmlChar *URL,
const xmlChar *ID,
xmlNodePtr *list);
int xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,
const xmlChar *URL,
const xmlChar *ID,
xmlNodePtr *list);
* SAX initialization routines
void xmlDefaultSAXHandlerInit(void);
void htmlDefaultSAXHandlerInit(void);
* Parser contexts handling.
void xmlInitParserCtxt (xmlParserCtxtPtr ctxt);
void xmlClearParserCtxt (xmlParserCtxtPtr ctxt);
void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
void xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt,
const xmlChar* buffer,
const char* filename);
xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur);
* Reading/setting optional parsing features.
int xmlGetFeaturesList (int *len,
const char **result);
int xmlGetFeature (xmlParserCtxtPtr ctxt,
const char *name,
void *result);
int xmlSetFeature (xmlParserCtxtPtr ctxt,
const char *name,
void *value);
* Interfaces for the Push mode
xmlParserCtxtPtr xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,
void *user_data,
const char *chunk,
int size,
const char *filename);
int xmlParseChunk (xmlParserCtxtPtr ctxt,
const char *chunk,
int size,
int terminate);
* Special I/O mode
xmlParserCtxtPtr xmlCreateIOParserCtxt (xmlSAXHandlerPtr sax,
void *user_data,
xmlInputReadCallback ioread,
xmlInputCloseCallback ioclose,
void *ioctx,
xmlCharEncoding enc);
xmlParserInputPtr xmlNewIOInputStream (xmlParserCtxtPtr ctxt,
xmlParserInputBufferPtr input,
xmlCharEncoding enc);
* Node infos
const xmlParserNodeInfo*
xmlParserFindNodeInfo (const xmlParserCtxt* ctxt,
const xmlNode* node);
void xmlInitNodeInfoSeq (xmlParserNodeInfoSeqPtr seq);
void xmlClearNodeInfoSeq (xmlParserNodeInfoSeqPtr seq);
unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
const xmlNode* node);
void xmlParserAddNodeInfo (xmlParserCtxtPtr ctxt,
const xmlParserNodeInfo* info);
* External entities handling actually implemented in xmlIO
void xmlSetExternalEntityLoader(xmlExternalEntityLoader f);
xmlLoadExternalEntity (const char *URL,
const char *ID,
xmlParserCtxtPtr context);
#ifdef __cplusplus
#endif /* __XML_PARSER_H__ */