[CVE-2025-32414] python: Read at most len/4 characters.
Fixes #889 by reserving space in the buffer for UTF-8 encoding of text.
diff --git a/python/libxml.c b/python/libxml.c
index 1fe8d68..2bf1407 100644
--- a/python/libxml.c
+++ b/python/libxml.c
@@ -248,7 +248,9 @@
file = (PyObject *) context;
if (file == NULL) return(-1);
- ret = PyObject_CallMethod(file, (char *) "read", (char *) "(i)", len);
+ /* When read() returns a string, the length is in characters not bytes, so
+ request at most len / 4 characters to leave space for UTF-8 encoding. */
+ ret = PyObject_CallMethod(file, (char *) "read", (char *) "(i)", len / 4);
if (ret == NULL) {
printf("xmlPythonFileReadRaw: result is NULL\n");
return(-1);
@@ -283,10 +285,12 @@
Py_DECREF(ret);
return(-1);
}
- if (lenread > len)
- memcpy(buffer, data, len);
- else
- memcpy(buffer, data, lenread);
+ if (lenread < 0 || lenread > len) {
+ printf("xmlPythonFileReadRaw: invalid lenread\n");
+ Py_DECREF(ret);
+ return(-1);
+ }
+ memcpy(buffer, data, lenread);
Py_DECREF(ret);
return(lenread);
}
@@ -310,7 +314,9 @@
file = (PyObject *) context;
if (file == NULL) return(-1);
- ret = PyObject_CallMethod(file, (char *) "io_read", (char *) "(i)", len);
+ /* When io_read() returns a string, the length is in characters not bytes, so
+ request at most len / 4 characters to leave space for UTF-8 encoding. */
+ ret = PyObject_CallMethod(file, (char *) "io_read", (char *) "(i)", len / 4);
if (ret == NULL) {
printf("xmlPythonFileRead: result is NULL\n");
return(-1);
@@ -345,10 +351,12 @@
Py_DECREF(ret);
return(-1);
}
- if (lenread > len)
- memcpy(buffer, data, len);
- else
- memcpy(buffer, data, lenread);
+ if (lenread < 0 || lenread > len) {
+ printf("xmlPythonFileRead: invalid lenread\n");
+ Py_DECREF(ret);
+ return(-1);
+ }
+ memcpy(buffer, data, lenread);
Py_DECREF(ret);
return(lenread);
}