Bug 758605: Heap-based buffer overread in xmlDictAddString <https://bugzilla.gnome.org/show_bug.cgi?id=758605> Reviewed by David Kilzer. * HTMLparser.c: (htmlParseName): Add bounds check. (htmlParseNameComplex): Ditto. * result/HTML/758605.html: Added. * result/HTML/758605.html.err: Added. * result/HTML/758605.html.sax: Added. * runtest.c: (pushParseTest): The input for the new test case was so small (4 bytes) that htmlParseChunk() was never called after htmlCreatePushParserCtxt(), thereby creating a false positive test failure. Fixed by using a do-while loop so we always call htmlParseChunk() at least once. * test/HTML/758605.html: Added.

commit: a820dbeac29d330bae4be05d9ecd939ad6b4aa33 [log] [tgz]
author: Pranjal Jumde <pjumde@apple.com> Tue Mar 01 11:34:04 2016 -0800
committer: Daniel Veillard <veillard@redhat.com> Mon May 23 15:01:07 2016 +0800
tree: 1f027e11ed873ef1b0535af7e98f8c5fa0e3d73a
parent: db07dd613e461df93dde7902c6505629bf0734e9 [diff]
diff --git a/HTMLparser.c b/HTMLparser.c
index 69eed2b..1c112cc 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c

@@ -2471,6 +2471,10 @@
 	       (*in == '_') || (*in == '-') ||
 	       (*in == ':') || (*in == '.'))
 	    in++;
+
+	if (in == ctxt->input->end)
+	    return(NULL);
+
 	if ((*in > 0) && (*in < 0x80)) {
 	    count = in - ctxt->input->cur;
 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
@@ -2514,6 +2518,10 @@
 	NEXTL(l);
 	c = CUR_CHAR(l);
     }
+
+    if (ctxt->input->base > ctxt->input->cur - len)
+	return(NULL);
+
     return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
 }
 

diff --git a/result/HTML/758605.html b/result/HTML/758605.html
new file mode 100644
index 0000000..a085cce
--- /dev/null
+++ b/result/HTML/758605.html

@@ -0,0 +1,3 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<html><body><p>&amp;
+</p></body></html>

diff --git a/result/HTML/758605.html.err b/result/HTML/758605.html.err
new file mode 100644
index 0000000..2b82be6
--- /dev/null
+++ b/result/HTML/758605.html.err

@@ -0,0 +1,3 @@
+./test/HTML/758605.html:1: HTML parser error : htmlParseEntityRef: no name
+ê
+  ^

diff --git a/result/HTML/758605.html.sax b/result/HTML/758605.html.sax
new file mode 100644
index 0000000..1f5cd32
--- /dev/null
+++ b/result/HTML/758605.html.sax

@@ -0,0 +1,13 @@
+SAX.setDocumentLocator()
+SAX.startDocument()
+SAX.error: htmlParseEntityRef: no name
+SAX.startElement(html)
+SAX.startElement(body)
+SAX.startElement(p)
+SAX.characters(&amp;, 1)
+SAX.ignorableWhitespace(
+, 1)
+SAX.endElement(p)
+SAX.endElement(body)
+SAX.endElement(html)
+SAX.endDocument()

diff --git a/runtest.c b/runtest.c
index 36fbe5a..bb74d2a 100644
--- a/runtest.c
+++ b/runtest.c

@@ -1873,7 +1873,7 @@
     ctxt = xmlCreatePushParserCtxt(NULL, NULL, base + cur, 4, filename);
     xmlCtxtUseOptions(ctxt, options);
     cur += 4;
-    while (cur < size) {
+    do {
         if (cur + 1024 >= size) {
 #ifdef LIBXML_HTML_ENABLED
 	    if (options & XML_PARSE_HTML)
@@ -1891,7 +1891,7 @@
 	    xmlParseChunk(ctxt, base + cur, 1024, 0);
 	    cur += 1024;
 	}
-    }
+    } while (cur < size);
     doc = ctxt->myDoc;
 #ifdef LIBXML_HTML_ENABLED
     if (options & XML_PARSE_HTML)

diff --git a/test/HTML/758605.html b/test/HTML/758605.html
new file mode 100644
index 0000000..9b1b3c2
--- /dev/null
+++ b/test/HTML/758605.html

@@ -0,0 +1 @@
+&:ê
commit	a820dbeac29d330bae4be05d9ecd939ad6b4aa33	[log] [tgz]
author	Pranjal Jumde <pjumde@apple.com>	Tue Mar 01 11:34:04 2016 -0800
committer	Daniel Veillard <veillard@redhat.com>	Mon May 23 15:01:07 2016 +0800
tree	1f027e11ed873ef1b0535af7e98f8c5fa0e3d73a
parent	db07dd613e461df93dde7902c6505629bf0734e9 [diff]