encoding: Support input-only and output-only converters
Make it possible to open an encoding handler only for input or output.
This avoids the creation of unnecessary converters.
Should also fix #863.
diff --git a/doc/libxml2-api.xml b/doc/libxml2-api.xml
index a605835..01e5fe9 100644
--- a/doc/libxml2-api.xml
+++ b/doc/libxml2-api.xml
@@ -349,7 +349,10 @@
<exports symbol='XML_ENC_ERR_MEMORY' type='enum'/>
<exports symbol='XML_ENC_ERR_SPACE' type='enum'/>
<exports symbol='XML_ENC_ERR_SUCCESS' type='enum'/>
+ <exports symbol='XML_ENC_INPUT' type='enum'/>
+ <exports symbol='XML_ENC_OUTPUT' type='enum'/>
<exports symbol='xmlCharEncError' type='typedef'/>
+ <exports symbol='xmlCharEncFlags' type='typedef'/>
<exports symbol='xmlCharEncoding' type='typedef'/>
<exports symbol='xmlCharEncodingHandler' type='typedef'/>
<exports symbol='xmlCharEncodingHandlerPtr' type='typedef'/>
@@ -4528,6 +4531,8 @@
<enum name='XML_ENC_ERR_MEMORY' file='encoding' value='-4' type='xmlCharEncError'/>
<enum name='XML_ENC_ERR_SPACE' file='encoding' value='-3' type='xmlCharEncError'/>
<enum name='XML_ENC_ERR_SUCCESS' file='encoding' value='0' type='xmlCharEncError'/>
+ <enum name='XML_ENC_INPUT' file='encoding' value='1' type='xmlCharEncFlags'/>
+ <enum name='XML_ENC_OUTPUT' file='encoding' value='2' type='xmlCharEncFlags'/>
<enum name='XML_ENTITY_DECL' file='tree' value='17' type='xmlElementType'/>
<enum name='XML_ENTITY_NODE' file='tree' value='6' type='xmlElementType' info='unused'/>
<enum name='XML_ENTITY_REF_NODE' file='tree' value='5' type='xmlElementType'/>
@@ -5721,6 +5726,7 @@
<info>This is a basic byte in an UTF-8 encoded string. It's unsigned allowing to pinpoint case where char * are assigned to xmlChar * (possibly making serialization back impossible).</info>
</typedef>
<typedef name='xmlCharEncError' file='encoding' type='enum'/>
+ <typedef name='xmlCharEncFlags' file='encoding' type='enum'/>
<typedef name='xmlCharEncoding' file='encoding' type='enum'/>
<struct name='xmlCharEncodingHandler' file='encoding' type='struct _xmlCharEncodingHandler'>
<field name='name' type='char *'/>
@@ -8117,11 +8123,11 @@
<arg name='flush' type='int' info='end of input'/>
</functype>
<functype name='xmlCharEncConvImpl' file='encoding' module='encoding'>
- <info>If this function returns XML_ERR_OK, it must fill the @out pointer with an encoding handler. The handler can be obtained from xmlCharEncNewCustomHandler.</info>
+ <info>If this function returns XML_ERR_OK, it must fill the @out pointer with an encoding handler. The handler can be obtained from xmlCharEncNewCustomHandler. @flags can contain XML_ENC_INPUT, XML_ENC_OUTPUT or both.</info>
<return type='int' info='an xmlParserErrors code.'/>
<arg name='vctxt' type='void *' info='user data'/>
<arg name='name' type='const char *' info='encoding name'/>
- <arg name='output' type='int' info='true if output encoding, false if input'/>
+ <arg name='flags' type='xmlCharEncFlags' info='bit mask of flags'/>
<arg name='out' type='xmlCharEncodingHandler **' info='pointer to resulting handler'/>
</functype>
<function name='xmlCharEncFirstLine' file='encoding' module='encoding'>
@@ -8368,10 +8374,10 @@
<arg name='cur' type='xmlAttrPtr' info='the first attribute'/>
</function>
<function name='xmlCreateCharEncodingHandler' file='encoding' module='encoding'>
- <info>Find or create a handler matching the encoding. The following converters are looked up in order: - Built-in handler (UTF-8, UTF-16, ISO-8859-1, ASCII) - Custom implementation if provided - User-registered global handler (deprecated) - iconv if enabled - ICU if enabled The handler must be closed with xmlCharEncCloseFunc. If the encoding is UTF-8, a NULL handler and no error code will be returned. Available since 2.14.0.</info>
+ <info>Find or create a handler matching the encoding. The following converters are looked up in order: - Built-in handler (UTF-8, UTF-16, ISO-8859-1, ASCII) - Custom implementation if provided - User-registered global handler (deprecated) - iconv if enabled - ICU if enabled The handler must be closed with xmlCharEncCloseFunc. If the encoding is UTF-8, a NULL handler and no error code will be returned. @flags can contain XML_ENC_INPUT, XML_ENC_OUTPUT or both. Available since 2.14.0.</info>
<return type='int' info='XML_ERR_OK, XML_ERR_UNSUPPORTED_ENCODING or another xmlParserErrors error code.'/>
<arg name='name' type='const char *' info='a string describing the char encoding.'/>
- <arg name='output' type='int' info='boolean, use handler for output'/>
+ <arg name='flags' type='xmlCharEncFlags' info='bit mask of flags'/>
<arg name='impl' type='xmlCharEncConvImpl' info='a conversion implementation (optional)'/>
<arg name='implCtxt' type='void *' info='user data for conversion implementation (optional)'/>
<arg name='out' type='xmlCharEncodingHandler **' info='pointer to result'/>
diff --git a/encoding.c b/encoding.c
index f172c68..be9ce66 100644
--- a/encoding.c
+++ b/encoding.c
@@ -239,12 +239,14 @@
#ifdef LIBXML_ICONV_ENABLED
static int
-xmlCharEncIconv(const char *name, xmlCharEncodingHandler **out);
+xmlCharEncIconv(const char *name, xmlCharEncFlags flags,
+ xmlCharEncodingHandler **out);
#endif
#ifdef LIBXML_ICU_ENABLED
static int
-xmlCharEncUconv(const char *name, xmlCharEncodingHandler **out);
+xmlCharEncUconv(const char *name, xmlCharEncFlags flags,
+ xmlCharEncodingHandler **out);
#endif
/************************************************************************
@@ -794,7 +796,7 @@
* xmlFindExtraHandler:
* @norig: name of the char encoding
* @name: potentially aliased name of the encoding
- * @output: boolean, use handler for output
+ * @flags: bit mask of flags
* @impl: a conversion implementation (optional)
* @implCtxt: user data for conversion implementation (optional)
* @out: pointer to resulting handler
@@ -804,7 +806,7 @@
* Returns an xmlParserErrors error code.
*/
static int
-xmlFindExtraHandler(const char *norig, const char *name, int output,
+xmlFindExtraHandler(const char *norig, const char *name, xmlCharEncFlags flags,
xmlCharEncConvImpl impl, void *implCtxt,
xmlCharEncodingHandler **out) {
/*
@@ -814,7 +816,7 @@
* alias resolution.
*/
if (impl != NULL)
- return(impl(implCtxt, norig, output, out));
+ return(impl(implCtxt, norig, flags, out));
/*
* Deprecated
@@ -827,7 +829,8 @@
if (!xmlStrcasecmp((const xmlChar *) name,
(const xmlChar *) h->name)) {
- if ((output ? h->output.func : h->input.func) != NULL) {
+ if ((((flags & XML_ENC_INPUT) == 0) || (h->input.func)) &&
+ (((flags & XML_ENC_OUTPUT) == 0) || (h->output.func))) {
*out = h;
return(XML_ERR_OK);
}
@@ -837,7 +840,7 @@
#ifdef LIBXML_ICONV_ENABLED
{
- int ret = xmlCharEncIconv(name, out);
+ int ret = xmlCharEncIconv(name, flags, out);
if (ret == XML_ERR_OK)
return(XML_ERR_OK);
@@ -848,7 +851,7 @@
#ifdef LIBXML_ICU_ENABLED
{
- int ret = xmlCharEncUconv(name, out);
+ int ret = xmlCharEncUconv(name, flags, out);
if (ret == XML_ERR_OK)
return(XML_ERR_OK);
@@ -906,9 +909,15 @@
return(XML_ERR_OK);
}
- if (handler->name != NULL)
- return(xmlFindExtraHandler(handler->name, handler->name, 0,
+ if (handler->name != NULL) {
+ xmlCharEncFlags flags = XML_ENC_INPUT;
+
+#ifdef LIBXML_OUTPUT_ENABLED
+ flags |= XML_ENC_OUTPUT;
+#endif
+ return(xmlFindExtraHandler(handler->name, handler->name, flags,
NULL, NULL, out));
+ }
return(XML_ERR_UNSUPPORTED_ENCODING);
}
@@ -934,7 +943,7 @@
/**
* xmlCreateCharEncodingHandler:
* @name: a string describing the char encoding.
- * @output: boolean, use handler for output
+ * @flags: bit mask of flags
* @impl: a conversion implementation (optional)
* @implCtxt: user data for conversion implementation (optional)
* @out: pointer to result
@@ -953,13 +962,15 @@
* If the encoding is UTF-8, a NULL handler and no error code will
* be returned.
*
+ * @flags can contain XML_ENC_INPUT, XML_ENC_OUTPUT or both.
+ *
* Available since 2.14.0.
*
* Returns XML_ERR_OK, XML_ERR_UNSUPPORTED_ENCODING or another
* xmlParserErrors error code.
*/
int
-xmlCreateCharEncodingHandler(const char *name, int output,
+xmlCreateCharEncodingHandler(const char *name, xmlCharEncFlags flags,
xmlCharEncConvImpl impl, void *implCtxt,
xmlCharEncodingHandler **out) {
const xmlCharEncodingHandler *handler;
@@ -970,7 +981,7 @@
return(XML_ERR_ARGUMENT);
*out = NULL;
- if (name == NULL)
+ if ((name == NULL) || (flags == 0))
return(XML_ERR_ARGUMENT);
norig = name;
@@ -986,13 +997,14 @@
if ((enc > 0) && ((size_t) enc < NUM_DEFAULT_HANDLERS)) {
handler = &defaultHandlers[enc];
- if ((output ? handler->output.func : handler->input.func) != NULL) {
+ if ((((flags & XML_ENC_INPUT) == 0) || (handler->input.func)) &&
+ (((flags & XML_ENC_OUTPUT) == 0) || (handler->output.func))) {
*out = (xmlCharEncodingHandler *) handler;
return(XML_ERR_OK);
}
}
- return(xmlFindExtraHandler(norig, name, output, impl, implCtxt, out));
+ return(xmlFindExtraHandler(norig, name, flags, impl, implCtxt, out));
}
/**
@@ -1022,7 +1034,9 @@
int
xmlOpenCharEncodingHandler(const char *name, int output,
xmlCharEncodingHandler **out) {
- return(xmlCreateCharEncodingHandler(name, output, NULL, NULL, out));
+ xmlCharEncFlags flags = output ? XML_ENC_OUTPUT : XML_ENC_INPUT;
+
+ return(xmlCreateCharEncodingHandler(name, flags, NULL, NULL, out));
}
/**
@@ -1041,6 +1055,7 @@
xmlCharEncodingHandlerPtr
xmlFindCharEncodingHandler(const char *name) {
xmlCharEncodingHandler *ret;
+ xmlCharEncFlags flags;
/*
* This handler shouldn't be used, but we must return a non-NULL
@@ -1051,7 +1066,11 @@
return((xmlCharEncodingHandlerPtr)
&defaultHandlers[XML_CHAR_ENCODING_UTF8]);
- xmlOpenCharEncodingHandler(name, 0, &ret);
+ flags = XML_ENC_INPUT;
+#ifdef LIBXML_OUTPUT_ENABLED
+ flags |= XML_ENC_OUTPUT;
+#endif
+ xmlCreateCharEncodingHandler(name, flags, NULL, NULL, &ret);
return(ret);
}
@@ -1131,6 +1150,9 @@
xmlIconvFree(void *vctxt) {
xmlIconvCtxt *ctxt = vctxt;
+ if (ctxt == NULL)
+ return;
+
if (ctxt->cd != (iconv_t) -1)
iconv_close(ctxt->cd);
@@ -1164,7 +1186,9 @@
#endif /* FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION */
static int
-xmlCharEncIconv(const char *name, xmlCharEncodingHandler **out) {
+xmlCharEncIconv(const char *name, xmlCharEncFlags flags,
+ xmlCharEncodingHandler **out) {
+ xmlCharEncConvFunc inFunc = NULL, outFunc = NULL;
xmlIconvCtxt *inputCtxt = NULL, *outputCtxt = NULL;
iconv_t icv_in;
iconv_t icv_out;
@@ -1217,47 +1241,54 @@
}
#endif
- inputCtxt = xmlMalloc(sizeof(xmlIconvCtxt));
- if (inputCtxt == NULL) {
- ret = XML_ERR_NO_MEMORY;
- goto error;
- }
- inputCtxt->cd = (iconv_t) -1;
-
- icv_in = iconv_open("UTF-8", name);
- if (icv_in == (iconv_t) -1) {
- if (errno == EINVAL)
- ret = XML_ERR_UNSUPPORTED_ENCODING;
- else if (errno == ENOMEM)
+ if (flags & XML_ENC_INPUT) {
+ inputCtxt = xmlMalloc(sizeof(xmlIconvCtxt));
+ if (inputCtxt == NULL) {
ret = XML_ERR_NO_MEMORY;
- else
- ret = XML_ERR_SYSTEM;
- goto error;
- }
- inputCtxt->cd = icv_in;
+ goto error;
+ }
+ inputCtxt->cd = (iconv_t) -1;
- outputCtxt = xmlMalloc(sizeof(xmlIconvCtxt));
- if (outputCtxt == NULL) {
- ret = XML_ERR_NO_MEMORY;
- goto error;
- }
- outputCtxt->cd = (iconv_t) -1;
+ icv_in = iconv_open("UTF-8", name);
+ if (icv_in == (iconv_t) -1) {
+ if (errno == EINVAL)
+ ret = XML_ERR_UNSUPPORTED_ENCODING;
+ else if (errno == ENOMEM)
+ ret = XML_ERR_NO_MEMORY;
+ else
+ ret = XML_ERR_SYSTEM;
+ goto error;
+ }
+ inputCtxt->cd = icv_in;
- icv_out = iconv_open(name, "UTF-8");
- if (icv_out == (iconv_t) -1) {
- if (errno == EINVAL)
- ret = XML_ERR_UNSUPPORTED_ENCODING;
- else if (errno == ENOMEM)
+ inFunc = xmlIconvConvert;
+ }
+
+ if (flags & XML_ENC_OUTPUT) {
+ outputCtxt = xmlMalloc(sizeof(xmlIconvCtxt));
+ if (outputCtxt == NULL) {
ret = XML_ERR_NO_MEMORY;
- else
- ret = XML_ERR_SYSTEM;
- goto error;
- }
- outputCtxt->cd = icv_out;
+ goto error;
+ }
+ outputCtxt->cd = (iconv_t) -1;
- return(xmlCharEncNewCustomHandler(name, xmlIconvConvert, xmlIconvConvert,
- xmlIconvFree, inputCtxt, outputCtxt,
- out));
+ icv_out = iconv_open(name, "UTF-8");
+ if (icv_out == (iconv_t) -1) {
+ if (errno == EINVAL)
+ ret = XML_ERR_UNSUPPORTED_ENCODING;
+ else if (errno == ENOMEM)
+ ret = XML_ERR_NO_MEMORY;
+ else
+ ret = XML_ERR_SYSTEM;
+ goto error;
+ }
+ outputCtxt->cd = icv_out;
+
+ outFunc = xmlIconvConvert;
+ }
+
+ return(xmlCharEncNewCustomHandler(name, inFunc, outFunc, xmlIconvFree,
+ inputCtxt, outputCtxt, out));
error:
if (inputCtxt != NULL)
@@ -1444,21 +1475,29 @@
}
static int
-xmlCharEncUconv(const char *name, xmlCharEncodingHandler **out) {
+xmlCharEncUconv(const char *name, xmlCharEncFlags flags,
+ xmlCharEncodingHandler **out) {
+ xmlCharEncConvFunc inFunc = NULL, outFunc = NULL;
xmlUconvCtxt *ucv_in = NULL;
xmlUconvCtxt *ucv_out = NULL;
int ret;
- ret = openIcuConverter(name, 1, &ucv_in);
- if (ret != 0)
- goto error;
- ret = openIcuConverter(name, 0, &ucv_out);
- if (ret != 0)
- goto error;
+ if (flags & XML_ENC_INPUT) {
+ ret = openIcuConverter(name, 1, &ucv_in);
+ if (ret != 0)
+ goto error;
+ inFunc = xmlUconvConvert;
+ }
- return(xmlCharEncNewCustomHandler(name, xmlUconvConvert, xmlUconvConvert,
- xmlUconvFree, ucv_in, ucv_out,
- out));
+ if (flags & XML_ENC_OUTPUT) {
+ ret = openIcuConverter(name, 0, &ucv_out);
+ if (ret != 0)
+ goto error;
+ outFunc = xmlUconvConvert;
+ }
+
+ return(xmlCharEncNewCustomHandler(name, inFunc, outFunc, xmlUconvFree,
+ ucv_in, ucv_out, out));
error:
if (ucv_in != NULL)
diff --git a/example/icu.c b/example/icu.c
index 59105e9..6f6f340 100644
--- a/example/icu.c
+++ b/example/icu.c
@@ -171,22 +171,29 @@
}
static int
-icuConvImpl(void *vctxt, const char *name, int output,
- xmlCharEncodingHandler **out) {
+icuConvImpl(void *vctxt, const char *name, xmlCharEncFlags flags,
+ xmlCharEncodingHandler **result) {
+ xmlCharEncConvFunc inFunc = NULL, outFunc = NULL;
myConvCtxt *inputCtxt = NULL;
myConvCtxt *outputCtxt = NULL;
int ret;
- ret = icuOpen(name, 1, &inputCtxt);
- if (ret != 0)
- goto error;
- ret = icuOpen(name, 0, &outputCtxt);
- if (ret != 0)
- goto error;
+ if (flags & XML_ENC_INPUT) {
+ ret = icuOpen(name, 1, &inputCtxt);
+ if (ret != 0)
+ goto error;
+ inFunc = icuConvert;
+ }
- return xmlCharEncNewCustomHandler(name, icuConvert, icuConvert,
- icuConvCtxtDtor, inputCtxt, outputCtxt,
- out);
+ if (flags & XML_ENC_OUTPUT) {
+ ret = icuOpen(name, 0, &outputCtxt);
+ if (ret != 0)
+ goto error;
+ outFunc = icuConvert;
+ }
+
+ return xmlCharEncNewCustomHandler(name, inFunc, outFunc, icuConvCtxtDtor,
+ inputCtxt, outputCtxt, result);
error:
if (inputCtxt != NULL)
diff --git a/include/libxml/encoding.h b/include/libxml/encoding.h
index 03c67b1..7b86260 100644
--- a/include/libxml/encoding.h
+++ b/include/libxml/encoding.h
@@ -83,6 +83,11 @@
XML_CHAR_ENCODING_8859_16= 30 /* ISO-8859-16 */
} xmlCharEncoding;
+typedef enum {
+ XML_ENC_INPUT = (1 << 0),
+ XML_ENC_OUTPUT = (1 << 1)
+} xmlCharEncFlags;
+
/**
* xmlCharEncodingInputFunc:
* @out: a pointer to an array of bytes to store the UTF-8 result
@@ -179,17 +184,19 @@
* xmlCharEncConvImpl:
* @vctxt: user data
* @name: encoding name
- * @output: true if output encoding, false if input
+ * @flags: bit mask of flags
* @out: pointer to resulting handler
*
* If this function returns XML_ERR_OK, it must fill the @out
* pointer with an encoding handler. The handler can be obtained
* from xmlCharEncNewCustomHandler.
*
+ * @flags can contain XML_ENC_INPUT, XML_ENC_OUTPUT or both.
+ *
* Returns an xmlParserErrors code.
*/
typedef int
-(*xmlCharEncConvImpl)(void *vctxt, const char *name, int output,
+(*xmlCharEncConvImpl)(void *vctxt, const char *name, xmlCharEncFlags flags,
xmlCharEncodingHandler **out);
/*
@@ -212,9 +219,9 @@
xmlCharEncodingHandlerPtr *out);
XMLPUBFUN int
xmlCreateCharEncodingHandler (const char *name,
- int output,
+ xmlCharEncFlags flags,
xmlCharEncConvImpl impl,
- void *implCtxt,
+ void *implCtxt,
xmlCharEncodingHandlerPtr *out);
XMLPUBFUN xmlCharEncodingHandlerPtr
xmlGetCharEncodingHandler (xmlCharEncoding enc);
diff --git a/parserInternals.c b/parserInternals.c
index 7d9bcb2..0178353 100644
--- a/parserInternals.c
+++ b/parserInternals.c
@@ -1142,7 +1142,7 @@
* To detect the EBCDIC code page, we convert the first 200 bytes
* to IBM037 (EBCDIC-US) and try to find the encoding declaration.
*/
- res = xmlCreateCharEncodingHandler("IBM037", /* output */ 0,
+ res = xmlCreateCharEncodingHandler("IBM037", XML_ENC_INPUT,
ctxt->convImpl, ctxt->convCtxt, &handler);
if (res != 0)
return(res);
@@ -1188,7 +1188,7 @@
out[i] = 0;
xmlCharEncCloseFunc(handler);
res = xmlCreateCharEncodingHandler((char *) out + start,
- /* output */ 0, ctxt->convImpl, ctxt->convCtxt,
+ XML_ENC_INPUT, ctxt->convImpl, ctxt->convCtxt,
&handler);
if (res != 0)
return(res);
@@ -1202,7 +1202,7 @@
* Encoding handlers are stateful, so we have to recreate them.
*/
xmlCharEncCloseFunc(handler);
- res = xmlCreateCharEncodingHandler("IBM037", /* output */ 0,
+ res = xmlCreateCharEncodingHandler("IBM037", XML_ENC_INPUT,
ctxt->convImpl, ctxt->convCtxt, &handler);
if (res != 0)
return(res);
@@ -1265,7 +1265,7 @@
if (encoding == NULL)
return(-1);
- res = xmlCreateCharEncodingHandler(encoding, /* output */ 0,
+ res = xmlCreateCharEncodingHandler(encoding, XML_ENC_INPUT,
ctxt->convImpl, ctxt->convCtxt, &handler);
if (res == XML_ERR_UNSUPPORTED_ENCODING) {
xmlWarningMsg(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
@@ -1618,7 +1618,7 @@
* declaration.
*/
res = xmlCreateCharEncodingHandler((const char *) encoding,
- /* output */ 0, ctxt->convImpl, ctxt->convCtxt, &handler);
+ XML_ENC_INPUT, ctxt->convImpl, ctxt->convCtxt, &handler);
if (res != XML_ERR_OK) {
xmlFatalErr(ctxt, res, (const char *) encoding);
xmlFree(encoding);
diff --git a/testparser.c b/testparser.c
index 6495c43..012c9fd 100644
--- a/testparser.c
+++ b/testparser.c
@@ -1064,20 +1064,20 @@
}
static int
-rot13ConvImpl(void *vctxt ATTRIBUTE_UNUSED, const char *name, int output,
- xmlCharEncodingHandler **out) {
+rot13ConvImpl(void *vctxt ATTRIBUTE_UNUSED, const char *name,
+ xmlCharEncFlags flags, xmlCharEncodingHandler **out) {
int *inputCtxt;
if (strcmp(name, "rot13") != 0)
- return xmlCreateCharEncodingHandler(name, output, NULL, NULL, out);
+ return xmlCreateCharEncodingHandler(name, flags, NULL, NULL, out);
- if (output)
+ if (flags & XML_ENC_OUTPUT)
return XML_ERR_UNSUPPORTED_ENCODING;
inputCtxt = xmlMalloc(sizeof(*inputCtxt));
*inputCtxt = 13;
- return xmlCharEncNewCustomHandler(name, rot13Convert, rot13Convert,
+ return xmlCharEncNewCustomHandler(name, rot13Convert, NULL,
rot13ConvCtxtDtor, inputCtxt, NULL,
out);
}
diff --git a/tools/gentest.py b/tools/gentest.py
index b518299..0d869fa 100755
--- a/tools/gentest.py
+++ b/tools/gentest.py
@@ -523,6 +523,8 @@
#
if (name == None) or ((name not in argtypes) and (name not in rettypes)):
continue;
+ if name == 'xmlCharEncFlags':
+ continue
define = 0
if (name in argtypes) and is_known_param_type(name) == 0: