New set of cleanups, released 2.2.3: - SAX.c debugXML.c parser.c parserInternals.c tree.c valid.c xpath.c: removed a few warnings in pedantic mode ... - parserInternals.c parser.c: moved encoding switching function to parserInternals.c - configure.in, doc/Makefile.am libxml.spec.in: released 2.2.3 Daniel

commit: 04698d9e1c56467007fcbb9472e5db67cf5938f5 [log] [tgz]
author: Daniel Veillard <veillard@src.gnome.org> Sun Sep 17 16:00:22 2000 +0000
committer: Daniel Veillard <veillard@src.gnome.org> Sun Sep 17 16:00:22 2000 +0000
tree: 0af06eb4821fb8005c67df4cddc4ed5d6936f485
parent: a2c6da94f8962cfe858c839ca8ebb12746c7036b [diff]
diff --git a/ChangeLog b/ChangeLog
index 85a5a9d..455328f 100644
--- a/ChangeLog
+++ b/ChangeLog

@@ -1,3 +1,11 @@
+Sun Sep 17 17:58:37 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org>
+
+	* SAX.c debugXML.c parser.c parserInternals.c tree.c valid.c xpath.c:
+	  removed a few warnings in pedantic mode ...
+	* parserInternals.c parser.c: moved encoding switching function
+	  to parserInternals.c
+	* configure.in, doc/Makefile.am libxml.spec.in: released 2.2.3
+
 Sat Sep 16 20:12:41 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org>
 
 	* HTMLparser.c parser.c: set ctxt->errNo before calling the

diff --git a/SAX.c b/SAX.c
index 3d1475b..b63ed2d 100644
--- a/SAX.c
+++ b/SAX.c

@@ -312,7 +312,7 @@
 {
     xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
     xmlParserInputPtr ret;
-    char *URI;
+    xmlChar *URI;
     const char *base = NULL;
 
     if (ctxt->input != NULL)
@@ -320,7 +320,7 @@
     if (base == NULL)
 	base = ctxt->directory;
 
-    URI = xmlBuildURI(systemId, base);
+    URI = xmlBuildURI(systemId, (const xmlChar *) base);
 
 #ifdef DEBUG_SAX
     fprintf(stderr, "SAX.resolveEntity(%s, %s)\n", publicId, systemId);
@@ -423,7 +423,7 @@
 	    ctxt->sax->warning(ctxt, 
 	     "Entity(%s) already defined in the internal subset\n", name);
 	if ((ent != NULL) && (ent->URI == NULL) && (systemId != NULL)) {
-	    char *URI;
+	    xmlChar *URI;
 	    const char *base = NULL;
 
 	    if (ctxt->input != NULL)
@@ -431,7 +431,7 @@
 	    if (base == NULL)
 		base = ctxt->directory;
 	
-	    URI = xmlBuildURI(systemId, base);
+	    URI = xmlBuildURI(systemId, (const xmlChar *) base);
 	    ent->URI = URI;
 	}
     } else if (ctxt->inSubset == 2) {
@@ -442,7 +442,7 @@
 	    ctxt->sax->warning(ctxt, 
 	     "Entity(%s) already defined in the external subset\n", name);
 	if ((ent != NULL) && (ent->URI == NULL) && (systemId != NULL)) {
-	    char *URI;
+	    xmlChar *URI;
 	    const char *base = NULL;
 
 	    if (ctxt->input != NULL)
@@ -450,7 +450,7 @@
 	    if (base == NULL)
 		base = ctxt->directory;
 	
-	    URI = xmlBuildURI(systemId, base);
+	    URI = xmlBuildURI(systemId, (const xmlChar *) base);
 	    ent->URI = URI;
 	}
     } else {

diff --git a/config.h.in b/config.h.in
index 7a7587a..01bb475 100644
--- a/config.h.in
+++ b/config.h.in

@@ -27,6 +27,9 @@
 /* Define if you have the fpclass function.  */
 #undef HAVE_FPCLASS
 
+/* Define if you have the iconv function.  */
+#undef HAVE_ICONV
+
 /* Define if you have the isnand function.  */
 #undef HAVE_ISNAND
 
@@ -96,6 +99,9 @@
 /* Define if you have the <stdlib.h> header file.  */
 #undef HAVE_STDLIB_H
 
+/* Define if you have the <string.h> header file.  */
+#undef HAVE_STRING_H
+
 /* Define if you have the <sys/dir.h> header file.  */
 #undef HAVE_SYS_DIR_H
 
@@ -144,3 +150,6 @@
 /* Version number of package */
 #undef VERSION
 
+/* Define if compiler has function prototypes */
+#undef PROTOTYPES
+

diff --git a/configure.in b/configure.in
index 5b7c675..61977a6 100644
--- a/configure.in
+++ b/configure.in

@@ -5,7 +5,7 @@
 
 LIBXML_MAJOR_VERSION=2
 LIBXML_MINOR_VERSION=2
-LIBXML_MICRO_VERSION=2
+LIBXML_MICRO_VERSION=3
 LIBXML_VERSION=$LIBXML_MAJOR_VERSION.$LIBXML_MINOR_VERSION.$LIBXML_MICRO_VERSION
 LIBXML_VERSION_INFO=`expr $LIBXML_MAJOR_VERSION + $LIBXML_MINOR_VERSION`:$LIBXML_MICRO_VERSION:$LIBXML_MINOR_VERSION
 
@@ -243,6 +243,7 @@
     echo Disabling ICONV support
     WITH_ICONV=0
 else    
+    AC_CHECK_FUNCS(iconv)
     if test "$have_iconv" != "" ; then
         echo Iconv support not found
         WITH_ICONV=0

diff --git a/debugXML.c b/debugXML.c
index e0c41f9..9ee3298 100644
--- a/debugXML.c
+++ b/debugXML.c

@@ -917,6 +917,9 @@
 	    break;
 	case XML_DOCUMENT_NODE:
 	case XML_HTML_DOCUMENT_NODE:
+#ifdef LIBXML_SGML_ENABLED
+	case XML_SGML_DOCUMENT_NODE:
+#endif
 	    list = ((xmlDocPtr) node)->children;
 	    break;
 	case XML_ATTRIBUTE_NODE:

diff --git a/doc/Makefile.am b/doc/Makefile.am
index e83c1a5..dcb86ac 100644
--- a/doc/Makefile.am
+++ b/doc/Makefile.am

@@ -51,6 +51,6 @@
 	-(cd $(DESTDIR); gtkdoc-fixxref --module=$(DOC_MODULE) --html-dir=$(HTML_DIR))
 
 dist-hook:
-	(cd $(srcdir) ; tar cvf - xml.html FAQ.html encoding.html structure.gif DOM.gif html/*.html html/*.sgml) | (cd $(distdir); tar xf -)
+	(cd $(srcdir) ; tar cvf - *.html *.gif html/*.html html/*.sgml) | (cd $(distdir); tar xf -)
 
 .PHONY : html sgml templates scan

diff --git a/libxml.spec.in b/libxml.spec.in
index b28e1e7..82753d8 100644
--- a/libxml.spec.in
+++ b/libxml.spec.in

@@ -92,8 +92,8 @@
 %defattr(-, root, root)
 
 %doc AUTHORS ChangeLog NEWS README COPYING COPYING.LIB TODO
-%doc /usr/man/man1/xmllint.1
-%doc /usr/man/man4/libxml.4
+%doc /usr/man/man1/xmllint.1*
+%doc /usr/man/man4/libxml.4*
 
 %{prefix}/lib/lib*.so.*
 %{prefix}/bin/xmllint

diff --git a/parser.c b/parser.c
index 4b43510..6f97fb0 100644
--- a/parser.c
+++ b/parser.c

@@ -901,311 +901,6 @@
 
 /************************************************************************
  *									*
- *		Commodity functions to handle encodings			*
- *									*
- ************************************************************************/
-
-/**
- * xmlSwitchEncoding:
- * @ctxt:  the parser context
- * @enc:  the encoding value (number)
- *
- * change the input functions when discovering the character encoding
- * of a given entity.
- *
- * Returns 0 in case of success, -1 otherwise
- */
-int
-xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
-{
-    xmlCharEncodingHandlerPtr handler;
-
-    switch (enc) {
-	case XML_CHAR_ENCODING_ERROR:
-	    ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
-	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-		ctxt->sax->error(ctxt->userData, "encoding unknown\n");
-	    ctxt->wellFormed = 0;
-	    ctxt->disableSAX = 1;
-	    break;
-	case XML_CHAR_ENCODING_NONE:
-	    /* let's assume it's UTF-8 without the XML decl */
-	    ctxt->charset = XML_CHAR_ENCODING_UTF8;
-	    return(0);
-	case XML_CHAR_ENCODING_UTF8:
-	    /* default encoding, no conversion should be needed */
-	    ctxt->charset = XML_CHAR_ENCODING_UTF8;
-	    return(0);
-	default:
-	    break;
-    }
-    handler = xmlGetCharEncodingHandler(enc);
-    if (handler == NULL) {
-	/*
-	 * Default handlers.
-	 */
-	switch (enc) {
-	    case XML_CHAR_ENCODING_ERROR:
-		ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
-		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-		    ctxt->sax->error(ctxt->userData, "encoding unknown\n");
-		ctxt->wellFormed = 0;
-		ctxt->disableSAX = 1;
-		ctxt->charset = XML_CHAR_ENCODING_UTF8;
-		break;
-	    case XML_CHAR_ENCODING_NONE:
-		/* let's assume it's UTF-8 without the XML decl */
-		ctxt->charset = XML_CHAR_ENCODING_UTF8;
-		return(0);
-	    case XML_CHAR_ENCODING_UTF8:
-	    case XML_CHAR_ENCODING_ASCII:
-		/* default encoding, no conversion should be needed */
-		ctxt->charset = XML_CHAR_ENCODING_UTF8;
-		return(0);
-	    case XML_CHAR_ENCODING_UTF16LE:
-		break;
-	    case XML_CHAR_ENCODING_UTF16BE:
-		break;
-	    case XML_CHAR_ENCODING_UCS4LE:
-		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-		    ctxt->sax->error(ctxt->userData,
-		      "char encoding USC4 little endian not supported\n");
-		break;
-	    case XML_CHAR_ENCODING_UCS4BE:
-		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-		    ctxt->sax->error(ctxt->userData,
-		      "char encoding USC4 big endian not supported\n");
-		break;
-	    case XML_CHAR_ENCODING_EBCDIC:
-		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-		    ctxt->sax->error(ctxt->userData,
-		      "char encoding EBCDIC not supported\n");
-		break;
-	    case XML_CHAR_ENCODING_UCS4_2143:
-		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-		    ctxt->sax->error(ctxt->userData,
-		      "char encoding UCS4 2143 not supported\n");
-		break;
-	    case XML_CHAR_ENCODING_UCS4_3412:
-		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-		    ctxt->sax->error(ctxt->userData,
-		      "char encoding UCS4 3412 not supported\n");
-		break;
-	    case XML_CHAR_ENCODING_UCS2:
-		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-		    ctxt->sax->error(ctxt->userData,
-		      "char encoding UCS2 not supported\n");
-		break;
-	    case XML_CHAR_ENCODING_8859_1:
-	    case XML_CHAR_ENCODING_8859_2:
-	    case XML_CHAR_ENCODING_8859_3:
-	    case XML_CHAR_ENCODING_8859_4:
-	    case XML_CHAR_ENCODING_8859_5:
-	    case XML_CHAR_ENCODING_8859_6:
-	    case XML_CHAR_ENCODING_8859_7:
-	    case XML_CHAR_ENCODING_8859_8:
-	    case XML_CHAR_ENCODING_8859_9:
-		/*
-		 * We used to keep the internal content in the
-		 * document encoding however this turns being unmaintainable
-		 * So xmlGetCharEncodingHandler() will return non-null
-		 * values for this now.
-		 */
-		if ((ctxt->inputNr == 1) &&
-		    (ctxt->encoding == NULL) &&
-		    (ctxt->input->encoding != NULL)) {
-		    ctxt->encoding = xmlStrdup(ctxt->input->encoding);
-		}
-		ctxt->charset = enc;
-		return(0);
-	    case XML_CHAR_ENCODING_2022_JP:
-		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-		    ctxt->sax->error(ctxt->userData,
-		      "char encoding ISO-2022-JPnot supported\n");
-		break;
-	    case XML_CHAR_ENCODING_SHIFT_JIS:
-		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-		    ctxt->sax->error(ctxt->userData,
-		      "char encoding Shift_JIS not supported\n");
-		break;
-	    case XML_CHAR_ENCODING_EUC_JP:
-		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-		    ctxt->sax->error(ctxt->userData,
-		      "char encoding EUC-JPnot supported\n");
-		break;
-	}
-    }
-    if (handler == NULL)
-	return(-1);
-    ctxt->charset = XML_CHAR_ENCODING_UTF8;
-    return(xmlSwitchToEncoding(ctxt, handler));
-}
-
-/**
- * xmlSwitchToEncoding:
- * @ctxt:  the parser context
- * @handler:  the encoding handler
- *
- * change the input functions when discovering the character encoding
- * of a given entity.
- *
- * Returns 0 in case of success, -1 otherwise
- */
-int
-xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) 
-{
-    int nbchars;
-
-    if (handler != NULL) {
-        if (ctxt->input != NULL) {
-	    if (ctxt->input->buf != NULL) {
-	        if (ctxt->input->buf->encoder != NULL) {
-		    if (ctxt->input->buf->encoder == handler)
-			return(0);
-		    /*
-		     * Note: this is a bit dangerous, but that's what it
-		     * takes to use nearly compatible signature for different
-		     * encodings.
-		     */
-		    xmlCharEncCloseFunc(ctxt->input->buf->encoder);
-		    ctxt->input->buf->encoder = handler;
-		    return(0);
-		}
-		ctxt->input->buf->encoder = handler;
-
-	        /*
-		 * Is there already some content down the pipe to convert ?
-		 */
-	        if ((ctxt->input->buf->buffer != NULL) &&
-		    (ctxt->input->buf->buffer->use > 0)) {
-		    int processed;
-
-		    /*
-		     * Specific handling of the Byte Order Mark for 
-		     * UTF-16
-		     */
-		    if ((handler->name != NULL) &&
-			(!strcmp(handler->name, "UTF-16LE")) && 
-		        (ctxt->input->cur[0] == 0xFF) &&
-		        (ctxt->input->cur[1] == 0xFE)) {
-			ctxt->input->cur += 2;
-		    }
-		    if ((handler->name != NULL) &&
-			(!strcmp(handler->name, "UTF-16BE")) && 
-		        (ctxt->input->cur[0] == 0xFE) &&
-		        (ctxt->input->cur[1] == 0xFF)) {
-			ctxt->input->cur += 2;
-		    }
-
-		    /*
-		     * Shring the current input buffer.
-		     * Move it as the raw buffer and create a new input buffer
-		     */
-		    processed = ctxt->input->cur - ctxt->input->base;
-		    xmlBufferShrink(ctxt->input->buf->buffer, processed);
-		    ctxt->input->buf->raw = ctxt->input->buf->buffer;
-		    ctxt->input->buf->buffer = xmlBufferCreate();
-
-		    if (ctxt->html) {
-			/*
-			 * converst as much as possbile of the buffer
-			 */
-			nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
-				                   ctxt->input->buf->buffer,
-						   ctxt->input->buf->raw);
-		    } else {
-			/*
-			 * convert just enough to get
-			 * '<?xml version="1.0" encoding="xxx"?>'
-			 * parsed with the autodetected encoding
-			 * into the parser reading buffer.
-			 */
-			nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
-						      ctxt->input->buf->buffer,
-						      ctxt->input->buf->raw);
-		    }
-		    if (nbchars < 0) {
-			fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
-			return(-1);
-		    }
-		    ctxt->input->base =
-		    ctxt->input->cur = ctxt->input->buf->buffer->content;
-
-		}
-		return(0);
-	    } else {
-	        if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
-		    /*
-		     * When parsing a static memory array one must know the
-		     * size to be able to convert the buffer.
-		     */
-		    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-			ctxt->sax->error(ctxt->userData,
-					 "xmlSwitchEncoding : no input\n");
-		    return(-1);
-		} else {
-		    int processed;
-
-		    /*
-		     * Shring the current input buffer.
-		     * Move it as the raw buffer and create a new input buffer
-		     */
-		    processed = ctxt->input->cur - ctxt->input->base;
-
-		    ctxt->input->buf->raw = xmlBufferCreate();
-		    xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
-				 ctxt->input->length - processed);
-		    ctxt->input->buf->buffer = xmlBufferCreate();
-
-		    /*
-		     * convert as much as possible of the raw input
-		     * to the parser reading buffer.
-		     */
-		    nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
-		                               ctxt->input->buf->buffer,
-					       ctxt->input->buf->raw);
-		    if (nbchars < 0) {
-			fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
-			return(-1);
-		    }
-
-		    /*
-		     * Conversion succeeded, get rid of the old buffer
-		     */
-		    if ((ctxt->input->free != NULL) &&
-		        (ctxt->input->base != NULL))
-			ctxt->input->free((xmlChar *) ctxt->input->base);
-		    ctxt->input->base =
-		    ctxt->input->cur = ctxt->input->buf->buffer->content;
-		}
-	    }
-	} else {
-	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-	        ctxt->sax->error(ctxt->userData,
-		                 "xmlSwitchEncoding : no input\n");
-	    return(-1);
-	}
-	/*
-	 * The parsing is now done in UTF8 natively
-	 */
-	ctxt->charset = XML_CHAR_ENCODING_UTF8;
-    } else 
-	return(-1);
-    return(0);
-
-}
-
-/************************************************************************
- *									*
  *		Commodity functions to handle xmlChars			*
  *									*
  ************************************************************************/

diff --git a/parserInternals.c b/parserInternals.c
index c20b90a..de757aa 100644
--- a/parserInternals.c
+++ b/parserInternals.c

@@ -1496,6 +1496,311 @@
 
 /************************************************************************
  *									*
+ *		Commodity functions to switch encodings			*
+ *									*
+ ************************************************************************/
+
+/**
+ * xmlSwitchEncoding:
+ * @ctxt:  the parser context
+ * @enc:  the encoding value (number)
+ *
+ * change the input functions when discovering the character encoding
+ * of a given entity.
+ *
+ * Returns 0 in case of success, -1 otherwise
+ */
+int
+xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
+{
+    xmlCharEncodingHandlerPtr handler;
+
+    switch (enc) {
+	case XML_CHAR_ENCODING_ERROR:
+	    ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
+	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		ctxt->sax->error(ctxt->userData, "encoding unknown\n");
+	    ctxt->wellFormed = 0;
+	    ctxt->disableSAX = 1;
+	    break;
+	case XML_CHAR_ENCODING_NONE:
+	    /* let's assume it's UTF-8 without the XML decl */
+	    ctxt->charset = XML_CHAR_ENCODING_UTF8;
+	    return(0);
+	case XML_CHAR_ENCODING_UTF8:
+	    /* default encoding, no conversion should be needed */
+	    ctxt->charset = XML_CHAR_ENCODING_UTF8;
+	    return(0);
+	default:
+	    break;
+    }
+    handler = xmlGetCharEncodingHandler(enc);
+    if (handler == NULL) {
+	/*
+	 * Default handlers.
+	 */
+	switch (enc) {
+	    case XML_CHAR_ENCODING_ERROR:
+		ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
+		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		    ctxt->sax->error(ctxt->userData, "encoding unknown\n");
+		ctxt->wellFormed = 0;
+		ctxt->disableSAX = 1;
+		ctxt->charset = XML_CHAR_ENCODING_UTF8;
+		break;
+	    case XML_CHAR_ENCODING_NONE:
+		/* let's assume it's UTF-8 without the XML decl */
+		ctxt->charset = XML_CHAR_ENCODING_UTF8;
+		return(0);
+	    case XML_CHAR_ENCODING_UTF8:
+	    case XML_CHAR_ENCODING_ASCII:
+		/* default encoding, no conversion should be needed */
+		ctxt->charset = XML_CHAR_ENCODING_UTF8;
+		return(0);
+	    case XML_CHAR_ENCODING_UTF16LE:
+		break;
+	    case XML_CHAR_ENCODING_UTF16BE:
+		break;
+	    case XML_CHAR_ENCODING_UCS4LE:
+		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		    ctxt->sax->error(ctxt->userData,
+		      "char encoding USC4 little endian not supported\n");
+		break;
+	    case XML_CHAR_ENCODING_UCS4BE:
+		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		    ctxt->sax->error(ctxt->userData,
+		      "char encoding USC4 big endian not supported\n");
+		break;
+	    case XML_CHAR_ENCODING_EBCDIC:
+		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		    ctxt->sax->error(ctxt->userData,
+		      "char encoding EBCDIC not supported\n");
+		break;
+	    case XML_CHAR_ENCODING_UCS4_2143:
+		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		    ctxt->sax->error(ctxt->userData,
+		      "char encoding UCS4 2143 not supported\n");
+		break;
+	    case XML_CHAR_ENCODING_UCS4_3412:
+		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		    ctxt->sax->error(ctxt->userData,
+		      "char encoding UCS4 3412 not supported\n");
+		break;
+	    case XML_CHAR_ENCODING_UCS2:
+		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		    ctxt->sax->error(ctxt->userData,
+		      "char encoding UCS2 not supported\n");
+		break;
+	    case XML_CHAR_ENCODING_8859_1:
+	    case XML_CHAR_ENCODING_8859_2:
+	    case XML_CHAR_ENCODING_8859_3:
+	    case XML_CHAR_ENCODING_8859_4:
+	    case XML_CHAR_ENCODING_8859_5:
+	    case XML_CHAR_ENCODING_8859_6:
+	    case XML_CHAR_ENCODING_8859_7:
+	    case XML_CHAR_ENCODING_8859_8:
+	    case XML_CHAR_ENCODING_8859_9:
+		/*
+		 * We used to keep the internal content in the
+		 * document encoding however this turns being unmaintainable
+		 * So xmlGetCharEncodingHandler() will return non-null
+		 * values for this now.
+		 */
+		if ((ctxt->inputNr == 1) &&
+		    (ctxt->encoding == NULL) &&
+		    (ctxt->input->encoding != NULL)) {
+		    ctxt->encoding = xmlStrdup(ctxt->input->encoding);
+		}
+		ctxt->charset = enc;
+		return(0);
+	    case XML_CHAR_ENCODING_2022_JP:
+		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		    ctxt->sax->error(ctxt->userData,
+		      "char encoding ISO-2022-JPnot supported\n");
+		break;
+	    case XML_CHAR_ENCODING_SHIFT_JIS:
+		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		    ctxt->sax->error(ctxt->userData,
+		      "char encoding Shift_JIS not supported\n");
+		break;
+	    case XML_CHAR_ENCODING_EUC_JP:
+		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		    ctxt->sax->error(ctxt->userData,
+		      "char encoding EUC-JPnot supported\n");
+		break;
+	}
+    }
+    if (handler == NULL)
+	return(-1);
+    ctxt->charset = XML_CHAR_ENCODING_UTF8;
+    return(xmlSwitchToEncoding(ctxt, handler));
+}
+
+/**
+ * xmlSwitchToEncoding:
+ * @ctxt:  the parser context
+ * @handler:  the encoding handler
+ *
+ * change the input functions when discovering the character encoding
+ * of a given entity.
+ *
+ * Returns 0 in case of success, -1 otherwise
+ */
+int
+xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) 
+{
+    int nbchars;
+
+    if (handler != NULL) {
+        if (ctxt->input != NULL) {
+	    if (ctxt->input->buf != NULL) {
+	        if (ctxt->input->buf->encoder != NULL) {
+		    if (ctxt->input->buf->encoder == handler)
+			return(0);
+		    /*
+		     * Note: this is a bit dangerous, but that's what it
+		     * takes to use nearly compatible signature for different
+		     * encodings.
+		     */
+		    xmlCharEncCloseFunc(ctxt->input->buf->encoder);
+		    ctxt->input->buf->encoder = handler;
+		    return(0);
+		}
+		ctxt->input->buf->encoder = handler;
+
+	        /*
+		 * Is there already some content down the pipe to convert ?
+		 */
+	        if ((ctxt->input->buf->buffer != NULL) &&
+		    (ctxt->input->buf->buffer->use > 0)) {
+		    int processed;
+
+		    /*
+		     * Specific handling of the Byte Order Mark for 
+		     * UTF-16
+		     */
+		    if ((handler->name != NULL) &&
+			(!strcmp(handler->name, "UTF-16LE")) && 
+		        (ctxt->input->cur[0] == 0xFF) &&
+		        (ctxt->input->cur[1] == 0xFE)) {
+			ctxt->input->cur += 2;
+		    }
+		    if ((handler->name != NULL) &&
+			(!strcmp(handler->name, "UTF-16BE")) && 
+		        (ctxt->input->cur[0] == 0xFE) &&
+		        (ctxt->input->cur[1] == 0xFF)) {
+			ctxt->input->cur += 2;
+		    }
+
+		    /*
+		     * Shring the current input buffer.
+		     * Move it as the raw buffer and create a new input buffer
+		     */
+		    processed = ctxt->input->cur - ctxt->input->base;
+		    xmlBufferShrink(ctxt->input->buf->buffer, processed);
+		    ctxt->input->buf->raw = ctxt->input->buf->buffer;
+		    ctxt->input->buf->buffer = xmlBufferCreate();
+
+		    if (ctxt->html) {
+			/*
+			 * converst as much as possbile of the buffer
+			 */
+			nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
+				                   ctxt->input->buf->buffer,
+						   ctxt->input->buf->raw);
+		    } else {
+			/*
+			 * convert just enough to get
+			 * '<?xml version="1.0" encoding="xxx"?>'
+			 * parsed with the autodetected encoding
+			 * into the parser reading buffer.
+			 */
+			nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
+						      ctxt->input->buf->buffer,
+						      ctxt->input->buf->raw);
+		    }
+		    if (nbchars < 0) {
+			fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
+			return(-1);
+		    }
+		    ctxt->input->base =
+		    ctxt->input->cur = ctxt->input->buf->buffer->content;
+
+		}
+		return(0);
+	    } else {
+	        if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
+		    /*
+		     * When parsing a static memory array one must know the
+		     * size to be able to convert the buffer.
+		     */
+		    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+			ctxt->sax->error(ctxt->userData,
+					 "xmlSwitchEncoding : no input\n");
+		    return(-1);
+		} else {
+		    int processed;
+
+		    /*
+		     * Shring the current input buffer.
+		     * Move it as the raw buffer and create a new input buffer
+		     */
+		    processed = ctxt->input->cur - ctxt->input->base;
+
+		    ctxt->input->buf->raw = xmlBufferCreate();
+		    xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
+				 ctxt->input->length - processed);
+		    ctxt->input->buf->buffer = xmlBufferCreate();
+
+		    /*
+		     * convert as much as possible of the raw input
+		     * to the parser reading buffer.
+		     */
+		    nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
+		                               ctxt->input->buf->buffer,
+					       ctxt->input->buf->raw);
+		    if (nbchars < 0) {
+			fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
+			return(-1);
+		    }
+
+		    /*
+		     * Conversion succeeded, get rid of the old buffer
+		     */
+		    if ((ctxt->input->free != NULL) &&
+		        (ctxt->input->base != NULL))
+			ctxt->input->free((xmlChar *) ctxt->input->base);
+		    ctxt->input->base =
+		    ctxt->input->cur = ctxt->input->buf->buffer->content;
+		}
+	    }
+	} else {
+	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+	        ctxt->sax->error(ctxt->userData,
+		                 "xmlSwitchEncoding : no input\n");
+	    return(-1);
+	}
+	/*
+	 * The parsing is now done in UTF8 natively
+	 */
+	ctxt->charset = XML_CHAR_ENCODING_UTF8;
+    } else 
+	return(-1);
+    return(0);
+
+}
+
+/************************************************************************
+ *									*
  *	Commodity functions to handle entities processing		*
  *									*
  ************************************************************************/
@@ -1705,7 +2010,7 @@
 	return(NULL);
 
     URI = xmlStrdup((xmlChar *) filename);
-    directory = xmlParserGetDirectory(URI);
+    directory = xmlParserGetDirectory((const char *) URI);
 
     inputStream = xmlNewInputStream(ctxt);
     if (inputStream == NULL) {
@@ -1714,7 +2019,7 @@
 	return(NULL);
     }
 
-    inputStream->filename = URI;
+    inputStream->filename = (const char *) URI;
     inputStream->directory = directory;
     inputStream->buf = buf;
 

diff --git a/tree.c b/tree.c
index 4366336..5174134 100644
--- a/tree.c
+++ b/tree.c

@@ -2637,6 +2637,9 @@
         case XML_PI_NODE:
         case XML_ENTITY_REF_NODE:
         case XML_ENTITY_NODE:
+#ifdef LIBXML_SGML_ENABLED
+	case XML_SGML_DOCUMENT_NODE:
+#endif
 	    return;
         case XML_ELEMENT_NODE:
         case XML_ATTRIBUTE_NODE:
@@ -2719,6 +2722,9 @@
         case XML_DOCUMENT_FRAG_NODE:
         case XML_NOTATION_NODE:
         case XML_HTML_DOCUMENT_NODE:
+#ifdef LIBXML_SGML_ENABLED
+	case XML_SGML_DOCUMENT_NODE:
+#endif
 	    return;
         case XML_ELEMENT_NODE:
         case XML_ATTRIBUTE_NODE:
@@ -2845,6 +2851,9 @@
         case XML_DOCUMENT_TYPE_NODE:
         case XML_NOTATION_NODE:
         case XML_DTD_NODE:
+#ifdef LIBXML_SGML_ENABLED
+	case XML_SGML_DOCUMENT_NODE:
+#endif
 	    return(NULL);
         case XML_ELEMENT_DECL:
 	    /* TODO !!! */
@@ -2930,6 +2939,9 @@
         case XML_DOCUMENT_NODE:
         case XML_HTML_DOCUMENT_NODE:
         case XML_DOCUMENT_TYPE_NODE:
+#ifdef LIBXML_SGML_ENABLED
+	case XML_SGML_DOCUMENT_NODE:
+#endif
 	    break;
         case XML_NOTATION_NODE:
 	    break;
@@ -3012,6 +3024,9 @@
         case XML_DTD_NODE:
         case XML_HTML_DOCUMENT_NODE:
         case XML_DOCUMENT_TYPE_NODE:
+#ifdef LIBXML_SGML_ENABLED
+	case XML_SGML_DOCUMENT_NODE:
+#endif
 	    break;
         case XML_ELEMENT_DECL:
 	    /* TODO !!! */
@@ -3096,6 +3111,9 @@
         case XML_DTD_NODE:
         case XML_HTML_DOCUMENT_NODE:
         case XML_DOCUMENT_TYPE_NODE:
+#ifdef LIBXML_SGML_ENABLED
+	case XML_SGML_DOCUMENT_NODE:
+#endif
 	    break;
         case XML_ELEMENT_DECL:
         case XML_ATTRIBUTE_DECL:

diff --git a/valid.c b/valid.c
index 3e38e3a..f2fc71d 100644
--- a/valid.c
+++ b/valid.c

@@ -3561,6 +3561,9 @@
 		 break;
             case XML_ATTRIBUTE_NODE:
             case XML_DOCUMENT_NODE:
+#ifdef LIBXML_SGML_ENABLED
+	    case XML_SGML_DOCUMENT_NODE:
+#endif
 	    case XML_HTML_DOCUMENT_NODE:
             case XML_DOCUMENT_TYPE_NODE:
             case XML_DOCUMENT_FRAG_NODE:

diff --git a/xpath.c b/xpath.c
index 433bb56..03b5392 100644
--- a/xpath.c
+++ b/xpath.c

@@ -1504,6 +1504,9 @@
             case XML_DOCUMENT_TYPE_NODE:
             case XML_DOCUMENT_FRAG_NODE:
             case XML_HTML_DOCUMENT_NODE:
+#ifdef LIBXML_SGML_ENABLED
+	    case XML_SGML_DOCUMENT_NODE:
+#endif
 		return(((xmlDocPtr) ctxt->context->node)->children);
 	    case XML_ELEMENT_DECL:
 	    case XML_ATTRIBUTE_DECL:
@@ -1632,6 +1635,9 @@
             case XML_DOCUMENT_TYPE_NODE:
             case XML_DOCUMENT_FRAG_NODE:
             case XML_HTML_DOCUMENT_NODE:
+#ifdef LIBXML_SGML_ENABLED
+	    case XML_SGML_DOCUMENT_NODE:
+#endif
                 return(NULL);
 	}
     }
@@ -1686,6 +1692,9 @@
             case XML_DOCUMENT_TYPE_NODE:
             case XML_DOCUMENT_FRAG_NODE:
             case XML_HTML_DOCUMENT_NODE:
+#ifdef LIBXML_SGML_ENABLED
+	    case XML_SGML_DOCUMENT_NODE:
+#endif
                 return(NULL);
 	}
 	return(NULL);
@@ -1717,6 +1726,9 @@
 	case XML_DOCUMENT_TYPE_NODE:
 	case XML_DOCUMENT_FRAG_NODE:
 	case XML_HTML_DOCUMENT_NODE:
+#ifdef LIBXML_SGML_ENABLED
+	case XML_SGML_DOCUMENT_NODE:
+#endif
 	    return(NULL);
     }
     return(NULL);
commit	04698d9e1c56467007fcbb9472e5db67cf5938f5	[log] [tgz]
author	Daniel Veillard <veillard@src.gnome.org>	Sun Sep 17 16:00:22 2000 +0000
committer	Daniel Veillard <veillard@src.gnome.org>	Sun Sep 17 16:00:22 2000 +0000
tree	0af06eb4821fb8005c67df4cddc4ed5d6936f485
parent	a2c6da94f8962cfe858c839ca8ebb12746c7036b [diff]