Check for invalid redeclarations of predefined entities

Implement section "4.6 Predefined Entities" of the XML 1.0 spec and
check whether redeclarations of predefined entities match the original
definitions.

Note that some test cases declared

    <!ENTITY lt "<">

But the XML spec clearly states that this is illegal:

> If the entities lt or amp are declared, they MUST be declared as
> internal entities whose replacement text is a character reference to
> the respective character (less-than sign or ampersand) being escaped;
> the double escaping is REQUIRED for these entities so that references
> to them produce a well-formed result.

Also fixes #217 but the connection is only tangential. The integer
overflow discovered by fuzzing was more related to the fact that various
parts of the parser disagreed on whether to prefer predefined entities
over their redeclarations. The whole situation is a mess and even
depends on legacy parser options. But now that redeclarations are
validated, it shouldn't make a difference.

As noted in the added comment, this is also one of the cases where
overly defensive checks can hide interesting logic bugs from fuzzers.
diff --git a/entities.c b/entities.c
index 08ef142..caad0b5 100644
--- a/entities.c
+++ b/entities.c
@@ -210,7 +210,7 @@
 	  const xmlChar *content) {
     xmlDictPtr dict = NULL;
     xmlEntitiesTablePtr table = NULL;
-    xmlEntityPtr ret;
+    xmlEntityPtr ret, predef;
 
     if (name == NULL)
 	return(NULL);
@@ -223,6 +223,43 @@
         case XML_INTERNAL_GENERAL_ENTITY:
         case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
         case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
+            predef = xmlGetPredefinedEntity(name);
+            if (predef != NULL) {
+                int valid = 0;
+
+                /* 4.6 Predefined Entities */
+                if (type == XML_INTERNAL_GENERAL_ENTITY) {
+                    int c = predef->content[0];
+
+                    if (((content[0] == c) && (content[1] == 0)) &&
+                        ((c == '>') || (c == '\'') || (c == '"'))) {
+                        valid = 1;
+                    } else if ((content[0] == '&') && (content[1] == '#')) {
+                        if (content[2] == 'x') {
+                            xmlChar *hex = BAD_CAST "0123456789ABCDEF";
+                            xmlChar ref[] = "00;";
+
+                            ref[0] = hex[c / 16 % 16];
+                            ref[1] = hex[c % 16];
+                            if (xmlStrcasecmp(&content[3], ref) == 0)
+                                valid = 1;
+                        } else {
+                            xmlChar ref[] = "00;";
+
+                            ref[0] = '0' + c / 10 % 10;
+                            ref[1] = '0' + c % 10;
+                            if (xmlStrEqual(&content[2], ref))
+                                valid = 1;
+                        }
+                    }
+                }
+                if (!valid) {
+                    xmlEntitiesErr(XML_ERR_ENTITY_PROCESSING,
+                            "xmlAddEntity: invalid redeclaration of predefined"
+                            " entity");
+                    return(NULL);
+                }
+            }
 	    if (dtd->entities == NULL)
 		dtd->entities = xmlHashCreateDict(0, dict);
 	    table = dtd->entities;
diff --git a/result/ent6hex b/result/ent6hex
new file mode 100644
index 0000000..6d59df9
--- /dev/null
+++ b/result/ent6hex
@@ -0,0 +1,9 @@
+<?xml version="1.0"?>
+<!DOCTYPE doc [
+<!ENTITY lt "&#x26;#x3c;">
+<!ENTITY gt "&#x26;#x3E;">
+<!ENTITY amp "&#x26;#x26;">
+<!ENTITY apos "&#x26;#x27;">
+<!ENTITY quot "&#x26;#x22;">
+]>
+<doc/>
diff --git a/result/ent6hex.rde b/result/ent6hex.rde
new file mode 100644
index 0000000..9b0a34d
--- /dev/null
+++ b/result/ent6hex.rde
@@ -0,0 +1,2 @@
+0 10 doc 0 0
+0 1 doc 1 0
diff --git a/result/ent6hex.rdr b/result/ent6hex.rdr
new file mode 100644
index 0000000..9b0a34d
--- /dev/null
+++ b/result/ent6hex.rdr
@@ -0,0 +1,2 @@
+0 10 doc 0 0
+0 1 doc 1 0
diff --git a/result/ent6hex.sax b/result/ent6hex.sax
new file mode 100644
index 0000000..df844cd
--- /dev/null
+++ b/result/ent6hex.sax
@@ -0,0 +1,17 @@
+SAX.setDocumentLocator()
+SAX.startDocument()
+SAX.internalSubset(doc, , )
+SAX.entityDecl(lt, 1, (null), (null), &#x3c;)
+SAX.getEntity(lt)
+SAX.entityDecl(gt, 1, (null), (null), &#x3E;)
+SAX.getEntity(gt)
+SAX.entityDecl(amp, 1, (null), (null), &#x26;)
+SAX.getEntity(amp)
+SAX.entityDecl(apos, 1, (null), (null), &#x27;)
+SAX.getEntity(apos)
+SAX.entityDecl(quot, 1, (null), (null), &#x22;)
+SAX.getEntity(quot)
+SAX.externalSubset(doc, , )
+SAX.startElement(doc)
+SAX.endElement(doc)
+SAX.endDocument()
diff --git a/result/ent6hex.sax2 b/result/ent6hex.sax2
new file mode 100644
index 0000000..6c9df2e
--- /dev/null
+++ b/result/ent6hex.sax2
@@ -0,0 +1,17 @@
+SAX.setDocumentLocator()
+SAX.startDocument()
+SAX.internalSubset(doc, , )
+SAX.entityDecl(lt, 1, (null), (null), &#x3c;)
+SAX.getEntity(lt)
+SAX.entityDecl(gt, 1, (null), (null), &#x3E;)
+SAX.getEntity(gt)
+SAX.entityDecl(amp, 1, (null), (null), &#x26;)
+SAX.getEntity(amp)
+SAX.entityDecl(apos, 1, (null), (null), &#x27;)
+SAX.getEntity(apos)
+SAX.entityDecl(quot, 1, (null), (null), &#x22;)
+SAX.getEntity(quot)
+SAX.externalSubset(doc, , )
+SAX.startElementNs(doc, NULL, NULL, 0, 0, 0)
+SAX.endElementNs(doc, NULL, NULL)
+SAX.endDocument()
diff --git a/result/errors/759398.xml.str b/result/errors/759398.xml.str
index de9a28c..6809c06 100644
--- a/result/errors/759398.xml.str
+++ b/result/errors/759398.xml.str
@@ -1,5 +1,5 @@
 ./test/errors/759398.xml:210: parser error : internal error: detected an error in element content
 
-need to worry about parsers whi<! don't expand 
+need to worry about parsers whi<! don't expand PErefs finding
                                ^
 ./test/errors/759398.xml : failed to parse
diff --git a/result/noent/ent6hex b/result/noent/ent6hex
new file mode 100644
index 0000000..6d59df9
--- /dev/null
+++ b/result/noent/ent6hex
@@ -0,0 +1,9 @@
+<?xml version="1.0"?>
+<!DOCTYPE doc [
+<!ENTITY lt "&#x26;#x3c;">
+<!ENTITY gt "&#x26;#x3E;">
+<!ENTITY amp "&#x26;#x26;">
+<!ENTITY apos "&#x26;#x27;">
+<!ENTITY quot "&#x26;#x22;">
+]>
+<doc/>
diff --git a/result/noent/ent6hex.sax2 b/result/noent/ent6hex.sax2
new file mode 100644
index 0000000..6c9df2e
--- /dev/null
+++ b/result/noent/ent6hex.sax2
@@ -0,0 +1,17 @@
+SAX.setDocumentLocator()
+SAX.startDocument()
+SAX.internalSubset(doc, , )
+SAX.entityDecl(lt, 1, (null), (null), &#x3c;)
+SAX.getEntity(lt)
+SAX.entityDecl(gt, 1, (null), (null), &#x3E;)
+SAX.getEntity(gt)
+SAX.entityDecl(amp, 1, (null), (null), &#x26;)
+SAX.getEntity(amp)
+SAX.entityDecl(apos, 1, (null), (null), &#x27;)
+SAX.getEntity(apos)
+SAX.entityDecl(quot, 1, (null), (null), &#x22;)
+SAX.getEntity(quot)
+SAX.externalSubset(doc, , )
+SAX.startElementNs(doc, NULL, NULL, 0, 0, 0)
+SAX.endElementNs(doc, NULL, NULL)
+SAX.endDocument()
diff --git a/result/valid/REC-xml-19980210.xml b/result/valid/REC-xml-19980210.xml
index 4460b3e..9fb9103 100644
--- a/result/valid/REC-xml-19980210.xml
+++ b/result/valid/REC-xml-19980210.xml
@@ -10,7 +10,7 @@
 <!ENTITY draft.month "February">
 <!ENTITY draft.year "1998">
 <!ENTITY WebSGML "WebSGML Adaptations Annex to ISO 8879">
-<!ENTITY lt "<">
+<!ENTITY lt "&#38;#60;">
 <!ENTITY gt ">">
 <!ENTITY xmlpio "'&lt;?xml'">
 <!ENTITY pic "'?>'">
diff --git a/test/ent6hex b/test/ent6hex
new file mode 100644
index 0000000..b86028b
--- /dev/null
+++ b/test/ent6hex
@@ -0,0 +1,8 @@
+<!DOCTYPE doc [
+<!ENTITY lt     "&#x26;#x3c;"> 
+<!ENTITY gt     "&#x26;#x3E;"> 
+<!ENTITY amp    "&#x26;#x26;"> 
+<!ENTITY apos   "&#x26;#x27;"> 
+<!ENTITY quot   "&#x26;#x22;"> 
+]>
+<doc/>
diff --git a/test/errors/759398.xml b/test/errors/759398.xml
index 132e029..1fcaa3f 100644
--- a/test/errors/759398.xml
+++ b/test/errors/759398.xml
@@ -18,7 +18,7 @@
 <!ENTITY WebSGML 
  'WebSGML Adaptations Annex to ISO 8879'>
 
-<!ENTITY lt     "<"> 
+<!ENTITY lt     "&#38;#60;"> 
 <!ENTITY gt     ">"> 
 <!ENTITY xmlpio "'&lt;?xml'">
 <!ENTITY pic    "'?>'">
diff --git a/test/relaxng/tutor11_1_3.xml b/test/relaxng/tutor11_1_3.xml
index d146032..495550b 100644
--- a/test/relaxng/tutor11_1_3.xml
+++ b/test/relaxng/tutor11_1_3.xml
@@ -18,7 +18,7 @@
 <!ENTITY WebSGML 
  'WebSGML Adaptations Annex to ISO 8879'>
 
-<!ENTITY lt     "<"> 
+<!ENTITY lt     "&#38;#60;"> 
 <!ENTITY gt     ">"> 
 <!ENTITY xmlpio "'&lt;?xml'">
 <!ENTITY pic    "'?>'">
diff --git a/test/valid/REC-xml-19980210.xml b/test/valid/REC-xml-19980210.xml
index bec8e90..6591b69 100644
--- a/test/valid/REC-xml-19980210.xml
+++ b/test/valid/REC-xml-19980210.xml
@@ -18,7 +18,7 @@
 <!ENTITY WebSGML 
  'WebSGML Adaptations Annex to ISO 8879'>
 
-<!ENTITY lt     "<"> 
+<!ENTITY lt     "&#38;#60;"> 
 <!ENTITY gt     ">"> 
 <!ENTITY xmlpio "'&lt;?xml'">
 <!ENTITY pic    "'?>'">
diff --git a/tree.c b/tree.c
index d2347df..d6ea704 100644
--- a/tree.c
+++ b/tree.c
@@ -1310,6 +1310,16 @@
 		else
 		    tmp = 0;
 		while (tmp != ';') { /* Non input consuming loop */
+                    /*
+                     * If you find an integer overflow here when fuzzing,
+                     * the bug is probably elsewhere. This function should
+                     * only receive entities that were already validated by
+                     * the parser, typically by xmlParseAttValueComplex
+                     * calling xmlStringDecodeEntities.
+                     *
+                     * So it's better *not* to check for overflow to
+                     * potentially discover new bugs.
+                     */
 		    if ((tmp >= '0') && (tmp <= '9'))
 			charval = charval * 16 + (tmp - '0');
 		    else if ((tmp >= 'a') && (tmp <= 'f'))
@@ -1338,6 +1348,7 @@
 		else
 		    tmp = 0;
 		while (tmp != ';') { /* Non input consuming loops */
+                    /* Don't check for integer overflow, see above. */
 		    if ((tmp >= '0') && (tmp <= '9'))
 			charval = charval * 10 + (tmp - '0');
 		    else {
@@ -1517,6 +1528,7 @@
 		cur += 3;
 		tmp = *cur;
 		while (tmp != ';') { /* Non input consuming loop */
+                    /* Don't check for integer overflow, see above. */
 		    if ((tmp >= '0') && (tmp <= '9'))
 			charval = charval * 16 + (tmp - '0');
 		    else if ((tmp >= 'a') && (tmp <= 'f'))
@@ -1539,6 +1551,7 @@
 		cur += 2;
 		tmp = *cur;
 		while (tmp != ';') { /* Non input consuming loops */
+                    /* Don't check for integer overflow, see above. */
 		    if ((tmp >= '0') && (tmp <= '9'))
 			charval = charval * 10 + (tmp - '0');
 		    else {