| * Summary: interface for an HTML 4.0 non-verifying parser | |
| * Description: this module implements an HTML 4.0 non-verifying parser | |
| * with API compatible with the XML parser ones. It should | |
| * be able to parse "real world" HTML, even if severely | |
| * broken from a specification point of view. | |
| * | |
| * Copy: See Copyright for the status of this software. | |
| * | |
| * Author: Patrick Monnerat <pm@datasphere.ch>, DATASPHERE S.A. | |
| /if not defined(HTML_PARSER_H__) | |
| /define HTML_PARSER_H__ | |
| /include "libxmlrpg/xmlversion" | |
| /if defined(LIBXML_HTML_ENABLED) | |
| /include "libxmlrpg/xmlTypesC" | |
| /include "libxmlrpg/parser" | |
| * Most of the back-end structures from XML and HTML are shared. | |
| d htmlParserCtxtPtr... | |
| d s based(######typedef######) | |
| d like(xmlParserCtxtPtr) | |
| d htmlParserCtxt ds based(htmlParserCtxtPtr) | |
| d likeds(xmlParserCtxt) | |
| d htmlParserNodeInfoPtr... | |
| d s based(######typedef######) | |
| d like(xmlParserNodeInfoPtr) | |
| d htmlParserNodeInfo... | |
| d ds based(htmlParserNodeInfoPtr) | |
| d likeds(xmlParserNodeInfo) | |
| d htmlSAXHandlerPtr... | |
| d s based(######typedef######) | |
| d like(xmlSAXHandlerPtr) | |
| d htmlSAXHandler ds based(htmlSAXHandlerPtr) | |
| d likeds(xmlSAXHandler) | |
| d htmlParserInputPtr... | |
| d s based(######typedef######) | |
| d like(xmlParserInputPtr) | |
| d htmlParserInput... | |
| d ds based(htmlParserInputPtr) | |
| d likeds(xmlParserInput) | |
| d htmlDocPtr s based(######typedef######) | |
| d like(xmlDocPtr) | |
| d htmlNodePtr s based(######typedef######) | |
| d like(xmlNodePtr) | |
| * Internal description of an HTML element, representing HTML 4.01 | |
| * and XHTML 1.0 (which share the same structure). | |
| d htmlElemDescPtr... | |
| d s * based(######typedef######) | |
| d htmlElemDesc ds based(htmlElemDescPtr) | |
| d align qualified | |
| d name * const char * | |
| d startTag like(xmlCchar) Start tag implied ? | |
| d endTag like(xmlCchar) End tag implied ? | |
| d saveEndTag like(xmlCchar) Save end tag ? | |
| d empty like(xmlCchar) Empty element ? | |
| d depr like(xmlCchar) Deprecated element ? | |
| d dtd like(xmlCchar) Loose DTD/Frameset | |
| d isinline like(xmlCchar) Block 0/inline elem? | |
| d desc * const char * | |
| * | |
| * New fields encapsulating HTML structure | |
| * | |
| * Bugs: | |
| * This is a very limited representation. It fails to tell us when | |
| * an element *requires* subelements (we only have whether they're | |
| * allowed or not), and it doesn't tell us where CDATA and PCDATA | |
| * are allowed. Some element relationships are not fully represented: | |
| * these are flagged with the word MODIFIER | |
| * | |
| d subelts * const char * * | |
| d defaultsubelt * const char * | |
| d attrs_opt * const char * * | |
| d attrs_depr * const char * * | |
| d attrs_req * const char * * | |
| * Internal description of an HTML entity. | |
| d htmlEntityDescPtr... | |
| d s * based(######typedef######) | |
| d htmlEntityDesc... | |
| d ds based(htmlEntityDescPtr) | |
| d align qualified | |
| d value like(xmlCuint) | |
| d name * const char * | |
| d desc * const char * | |
| * There is only few public functions. | |
| d htmlTagLookup pr extproc('htmlTagLookup') | |
| d like(htmlElemDescPtr) const | |
| d tag * value options(*string) const xmlChar * | |
| d htmlEntityLookup... | |
| d pr extproc('htmlEntityLookup') | |
| d like(htmlEntityDescPtr) const | |
| d name * value options(*string) const xmlChar * | |
| d htmlEntityValueLookup... | |
| d pr extproc('htmlEntityValueLookup') | |
| d like(htmlEntityDescPtr) const | |
| d value value like(xmlCuint) | |
| d htmlIsAutoClosed... | |
| d pr extproc('htmlIsAutoClosed') | |
| d like(xmlCint) | |
| d doc value like(htmlDocPtr) | |
| d elem value like(htmlNodePtr) | |
| d htmlAutoCloseTag... | |
| d pr extproc('htmlAutoCloseTag') | |
| d like(xmlCint) | |
| d doc value like(htmlDocPtr) | |
| d name * value options(*string) const xmlChar * | |
| d elem value like(htmlNodePtr) | |
| d htmlParseEntityRef... | |
| d pr extproc('htmlParseEntityRef') | |
| d like(htmlEntityDescPtr) const | |
| d ctxt value like(htmlParserCtxtPtr) | |
| d str * const xmlChar *(*) | |
| d htmlParseCharRef... | |
| d pr extproc('htmlParseCharRef') | |
| d like(xmlCint) | |
| d ctxt value like(htmlParserCtxtPtr) | |
| d htmlParseElement... | |
| d pr extproc('htmlParseElement') | |
| d ctxt value like(htmlParserCtxtPtr) | |
| d htmlNewParserCtxt... | |
| d pr extproc('htmlNewParserCtxt') | |
| d like(htmlParserCtxtPtr) | |
| d htmlCreateMemoryParserCtxt... | |
| d pr extproc('htmlCreateMemoryParserCtxt') | |
| d like(htmlParserCtxtPtr) | |
| d buffer * value options(*string) const char * | |
| d size value like(xmlCint) | |
| d htmlParseDocument... | |
| d pr extproc('htmlParseDocument') | |
| d like(xmlCint) | |
| d ctxt value like(htmlParserCtxtPtr) | |
| d htmlSAXParseDoc... | |
| d pr extproc('htmlSAXParseDoc') | |
| d like(htmlDocPtr) | |
| d cur * value options(*string) xmlChar * | |
| d encoding * value options(*string) const char * | |
| d sax value like(htmlSAXHandlerPtr) | |
| d userData * value void * | |
| d htmlParseDoc pr extproc('htmlParseDoc') | |
| d like(htmlDocPtr) | |
| d cur * value options(*string) xmlChar * | |
| d encoding * value options(*string) const char * | |
| d htmlSAXParseFile... | |
| d pr extproc('htmlSAXParseFile') | |
| d like(htmlDocPtr) | |
| d filename * value options(*string) const char * | |
| d encoding * value options(*string) const char * | |
| d sax value like(htmlSAXHandlerPtr) | |
| d userData * value void * | |
| d htmlParseFile pr extproc('htmlParseFile') | |
| d like(htmlDocPtr) | |
| d filename * value options(*string) const char * | |
| d encoding * value options(*string) const char * | |
| d UTF8ToHtml pr extproc('UTF8ToHtml') | |
| d like(xmlCint) | |
| d out 65535 options(*varsize) unsigned char [] | |
| d outlen like(xmlCint) | |
| d in * value options(*string) const unsigned char* | |
| d inlen like(xmlCint) | |
| d htmlEncodeEntities... | |
| d pr extproc('htmlEncodeEntities') | |
| d like(xmlCint) | |
| d out 65535 options(*varsize) unsigned char [] | |
| d outlen like(xmlCint) | |
| d in * value options(*string) const unsigned char* | |
| d inlen like(xmlCint) | |
| d quoteChar value like(xmlCint) | |
| d htmlIsScriptAttribute... | |
| d pr extproc('htmlIsScriptAttribute') | |
| d like(xmlCint) | |
| d name * value options(*string) const xmlChar * | |
| d htmlHandleOmittedElem... | |
| d pr extproc('htmlHandleOmittedElem') | |
| d like(xmlCint) | |
| d val value like(xmlCint) | |
| /if defined(LIBXML_PUSH_ENABLED) | |
| * Interfaces for the Push mode. | |
| d htmlCreatePushParserCtxt... | |
| d pr extproc('htmlCreatePushParserCtxt') | |
| d like(htmlParserCtxtPtr) | |
| d sax value like(htmlSAXHandlerPtr) | |
| d user_data * value void * | |
| d chunk * value options(*string) const char * | |
| d size value like(xmlCint) | |
| d filename * value options(*string) const char * | |
| d enc value like(xmlCharEncoding) | |
| d htmlParseChunk pr extproc('htmlParseChunk') | |
| d like(xmlCint) | |
| d ctxt value like(htmlParserCtxtPtr) | |
| d chunk * value options(*string) const char * | |
| d size value like(xmlCint) | |
| d terminate value like(xmlCint) | |
| /endif LIBXML_PUSH_ENABLED | |
| d htmlFreeParserCtxt... | |
| d pr extproc('htmlFreeParserCtxt') | |
| d ctxt value like(htmlParserCtxtPtr) | |
| * New set of simpler/more flexible APIs | |
| * xmlParserOption: | |
| * | |
| * This is the set of XML parser options that can be passed down | |
| * to the xmlReadDoc() and similar calls. | |
| d htmlParserOption... | |
| d s based(######typedef######) | |
| d like(xmlCenum) | |
| d HTML_PARSE_RECOVER... Relaxed parsing | |
| d c X'00000001' | |
| d HTML_PARSE_NODEFDTD... No default doctype | |
| d c X'00000004' | |
| d HTML_PARSE_NOERROR... No error reports | |
| d c X'00000020' | |
| d HTML_PARSE_NOWARNING... No warning reports | |
| d c X'00000040' | |
| d HTML_PARSE_PEDANTIC... Pedantic err reports | |
| d c X'00000080' | |
| d HTML_PARSE_NOBLANKS... Remove blank nodes | |
| d c X'00000100' | |
| d HTML_PARSE_NONET... Forbid net access | |
| d c X'00000800' | |
| d HTML_PARSE_NOIMPLIED... No implied html/body | |
| d c X'00002000' | |
| d HTML_PARSE_COMPACT... compact small txtnod | |
| d c X'00010000' | |
| d HTML_PARSE_IGNORE_ENC... Ignore encoding hint | |
| d c X'00200000' | |
| d htmlCtxtReset pr extproc('htmlCtxtReset') | |
| d ctxt value like(htmlParserCtxtPtr) | |
| d htmlCtxtUseOptions... | |
| d pr extproc('htmlCtxtUseOptions') | |
| d like(xmlCint) | |
| d ctxt value like(htmlParserCtxtPtr) | |
| d options value like(xmlCint) | |
| d htmlReadDoc pr extproc('htmlReadDoc') | |
| d like(htmlDocPtr) | |
| d cur * value options(*string) const xmlChar * | |
| d URL * value options(*string) const char * | |
| d encoding * value options(*string) const char * | |
| d options value like(xmlCint) | |
| d htmlReadFile pr extproc('htmlReadFile') | |
| d like(htmlDocPtr) | |
| d URL * value options(*string) const char * | |
| d encoding * value options(*string) const char * | |
| d options value like(xmlCint) | |
| d htmlReadMemory pr extproc('htmlReadMemory') | |
| d like(htmlDocPtr) | |
| d buffer * value options(*string) const char * | |
| d size value like(xmlCint) | |
| d URL * value options(*string) const char * | |
| d encoding * value options(*string) const char * | |
| d options value like(xmlCint) | |
| d htmlReadFd pr extproc('htmlReadFd') | |
| d like(htmlDocPtr) | |
| d fd value like(xmlCint) | |
| d URL * value options(*string) const char * | |
| d encoding * value options(*string) const char * | |
| d options value like(xmlCint) | |
| d htmlReadIO pr extproc('htmlReadIO') | |
| d like(htmlDocPtr) | |
| d ioread value like(xmlInputReadCallback) | |
| d ioclose value like(xmlInputCloseCallback) | |
| d ioctx * value void * | |
| d URL * value options(*string) const char * | |
| d encoding * value options(*string) const char * | |
| d options value like(xmlCint) | |
| d htmlCtxtReadDoc... | |
| d pr extproc('htmlCtxtReadDoc') | |
| d like(htmlDocPtr) | |
| d ctxt value like(xmlParserCtxtPtr) | |
| d cur * value options(*string) const xmlChar * | |
| d URL * value options(*string) const char * | |
| d encoding * value options(*string) const char * | |
| d options value like(xmlCint) | |
| d htmlCtxtReadFile... | |
| d pr extproc('htmlCtxtReadFile') | |
| d like(htmlDocPtr) | |
| d ctxt value like(xmlParserCtxtPtr) | |
| d filename * value options(*string) const char * | |
| d encoding * value options(*string) const char * | |
| d options value like(xmlCint) | |
| d htmlCtxtReadMemory... | |
| d pr extproc('htmlCtxtReadMemory') | |
| d like(htmlDocPtr) | |
| d ctxt value like(xmlParserCtxtPtr) | |
| d buffer * value options(*string) const char * | |
| d size value like(xmlCint) | |
| d URL * value options(*string) const char * | |
| d encoding * value options(*string) const char * | |
| d options value like(xmlCint) | |
| d htmlCtxtReadFd pr extproc('htmlCtxtReadFd') | |
| d like(htmlDocPtr) | |
| d ctxt value like(xmlParserCtxtPtr) | |
| d fd value like(xmlCint) | |
| d URL * value options(*string) const char * | |
| d encoding * value options(*string) const char * | |
| d options value like(xmlCint) | |
| d htmlCtxtReadIO pr extproc('htmlCtxtReadIO') | |
| d like(htmlDocPtr) | |
| d ctxt value like(xmlParserCtxtPtr) | |
| d ioread value like(xmlInputReadCallback) | |
| d ioclose value like(xmlInputCloseCallback) | |
| d ioctx * value void * | |
| d URL * value options(*string) const char * | |
| d encoding * value options(*string) const char * | |
| d options value like(xmlCint) | |
| * Further knowledge of HTML structure | |
| d htmlStatus s based(######typedef######) | |
| d like(xmlCenum) | |
| d HTML_NA c X'0000' No check at all | |
| d HTML_INVALID c X'0001' | |
| d HTML_DEPRECATED... | |
| d c X'0002' | |
| d HTML_VALID c X'0004' | |
| d HTML_REQUIRED c X'000C' HTML_VALID ored-in | |
| * Using htmlElemDesc rather than name here, to emphasise the fact | |
| * that otherwise there's a lookup overhead | |
| d htmlAttrAllowed... | |
| d pr extproc('htmlAttrAllowed') | |
| d like(htmlStatus) | |
| d #param1 value like(htmlElemDescPtr) const | |
| d #param2 * value options(*string) const xmlChar * | |
| d #param3 value like(xmlCint) | |
| d htmlElementAllowedHere... | |
| d pr extproc('htmlElementAllowedHere') | |
| d like(xmlCint) | |
| d #param1 value like(htmlElemDescPtr) const | |
| d #param2 * value options(*string) const xmlChar * | |
| d htmlElementStatusHere... | |
| d pr extproc('htmlElementStatusHere') | |
| d like(htmlStatus) | |
| d #param1 value like(htmlElemDescPtr) const | |
| d #param2 value like(htmlElemDescPtr) const | |
| d htmlNodeStatus pr extproc('htmlNodeStatus') | |
| d like(htmlStatus) | |
| d #param1 value like(htmlNodePtr) | |
| d #param2 value like(xmlCint) | |
| * C macros implemented as procedures for ILE/RPG support. | |
| d htmlDefaultSubelement... | |
| d pr * extproc('__htmlDefaultSubelement') const char * | |
| d elt * value const htmlElemDesc * | |
| d htmlElementAllowedHereDesc... | |
| d pr extproc( | |
| d '__htmlElementAllowedHereDesc') | |
| d like(xmlCint) | |
| d parent * value const htmlElemDesc * | |
| d elt * value const htmlElemDesc * | |
| d htmlRequiredAttrs... | |
| d pr * extproc('__htmlRequiredAttrs') const char * * | |
| d elt * value const htmlElemDesc * | |
| /endif LIBXML_HTML_ENABLED | |
| /endif HTML_PARSER_H__ |