html: Stop implying <p> start tags

Only <html>, <head> or <body> should be implied. Opening extra <p> tags
has always been a libxml2 quirk.
diff --git a/HTMLparser.c b/HTMLparser.c
index eac9964..1af775e 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -1165,19 +1165,6 @@
 };
 
 /*
- * The list of HTML elements which are supposed not to have
- * CDATA content and where a p element will be implied
- *
- * TODO: extend that list by reading the HTML SGML DTD on
- *       implied paragraph
- */
-static const char *const htmlNoContentElements[] = {
-    "html",
-    "head",
-    NULL
-};
-
-/*
  * The list of HTML attributes which are of content %Script;
  * NOTE: when adding ones, check htmlIsScriptAttribute() since
  *       it assumes the name starts with 'on'
@@ -1568,48 +1555,22 @@
 }
 
 /**
- * htmlCheckParagraph
+ * htmlStartCharData
  * @ctxt:  an HTML parser context
  *
- * Check whether a p element need to be implied before inserting
- * characters in the current element.
- *
- * Returns 1 if a paragraph has been inserted, 0 if not and -1
- *         in case of error.
+ * Prepare for non-whitespace character data.
  */
 
-static int
-htmlCheckParagraph(htmlParserCtxtPtr ctxt) {
-    const xmlChar *tag;
-    int i;
-
-    if (ctxt == NULL)
-	return(-1);
-    if (ctxt->options & HTML_PARSE_HTML5)
-        return(0);
-
-    tag = ctxt->name;
-    if (tag == NULL) {
-	htmlAutoClose(ctxt, BAD_CAST"p");
-	htmlCheckImplied(ctxt, BAD_CAST"p");
-	htmlnamePush(ctxt, BAD_CAST"p");
-	if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
-	    ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
-	return(1);
-    }
+static void
+htmlStartCharData(htmlParserCtxtPtr ctxt) {
+    if (ctxt->options & (HTML_PARSE_NOIMPLIED | HTML_PARSE_HTML5))
+        return;
     if (!htmlOmittedDefaultValue)
-	return(0);
-    for (i = 0; htmlNoContentElements[i] != NULL; i++) {
-	if (xmlStrEqual(tag, BAD_CAST htmlNoContentElements[i])) {
-	    htmlAutoClose(ctxt, BAD_CAST"p");
-	    htmlCheckImplied(ctxt, BAD_CAST"p");
-	    htmlnamePush(ctxt, BAD_CAST"p");
-	    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
-		ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
-	    return(1);
-	}
-    }
-    return(0);
+	return;
+
+    if (xmlStrEqual(ctxt->name, BAD_CAST "head"))
+        htmlAutoClose(ctxt, BAD_CAST "p");
+    htmlCheckImplied(ctxt, BAD_CAST "p");
 }
 
 /**
@@ -2972,7 +2933,7 @@
 
             /*
              * Add leading whitespace to html or head elements before
-             * calling htmlCheckParagraph.
+             * calling htmlStartCharData.
              */
             for (i = 0; i < size; i++)
                 if (!IS_WS_HTML(buf[i]))
@@ -2994,7 +2955,7 @@
             if (size <= 0)
                 return;
 
-            htmlCheckParagraph(ctxt);
+            htmlStartCharData(ctxt);
         }
 
         if ((mode == 0) &&
@@ -4084,7 +4045,7 @@
     SKIP(2);
 
     if (ctxt->input->cur >= ctxt->input->end) {
-        htmlCheckParagraph(ctxt);
+        htmlStartCharData(ctxt);
         if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
             (ctxt->sax->characters != NULL))
             ctxt->sax->characters(ctxt->userData,
@@ -4243,7 +4204,7 @@
             } else if (IS_ASCII_LETTER(NXT(1))) {
                 htmlParseElementInternal(ctxt);
             } else {
-                htmlCheckParagraph(ctxt);
+                htmlStartCharData(ctxt);
                 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
                     (ctxt->sax->characters != NULL))
                     ctxt->sax->characters(ctxt->userData, BAD_CAST "<", 1);
@@ -5187,7 +5148,7 @@
                         ctxt->checkIndex = 0;
                     } else {
                         ctxt->instate = XML_PARSER_CONTENT;
-                        htmlCheckParagraph(ctxt);
+                        htmlStartCharData(ctxt);
                         if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
                             (ctxt->sax->characters != NULL))
                             ctxt->sax->characters(ctxt->userData,
diff --git a/result/HTML/758518-entity.html b/result/HTML/758518-entity.html
index 0f66150..7dde7c6 100644
--- a/result/HTML/758518-entity.html
+++ b/result/HTML/758518-entity.html
@@ -1,2 +1,2 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
-<html><body><p>&amp;j&Ugrave;</p></body></html>
+<html><body>&amp;j&Ugrave;</body></html>
diff --git a/result/HTML/758518-entity.html.sax b/result/HTML/758518-entity.html.sax
index 25aa72a..4c512a0 100644
--- a/result/HTML/758518-entity.html.sax
+++ b/result/HTML/758518-entity.html.sax
@@ -2,10 +2,8 @@
 SAX.startDocument()
 SAX.startElement(html)
 SAX.startElement(body)
-SAX.startElement(p)
 SAX.characters(&amp;j, 2)
 SAX.characters(&Ugrave;, 2)
-SAX.endElement(p)
 SAX.endElement(body)
 SAX.endElement(html)
 SAX.endDocument()
diff --git a/result/HTML/758605.html b/result/HTML/758605.html
index ce5cbe5..77f70a0 100644
--- a/result/HTML/758605.html
+++ b/result/HTML/758605.html
@@ -1,3 +1,3 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
-<html><body><p>&amp;:&ecirc;
-</p></body></html>
+<html><body>&amp;:&ecirc;
+</body></html>
diff --git a/result/HTML/758605.html.sax b/result/HTML/758605.html.sax
index c6dc85a..ba116b7 100644
--- a/result/HTML/758605.html.sax
+++ b/result/HTML/758605.html.sax
@@ -2,11 +2,9 @@
 SAX.startDocument()
 SAX.startElement(html)
 SAX.startElement(body)
-SAX.startElement(p)
 SAX.characters(&amp;:, 2)
 SAX.characters(&ecirc;
 , 3)
-SAX.endElement(p)
 SAX.endElement(body)
 SAX.endElement(html)
 SAX.endDocument()
diff --git a/result/HTML/implied1.html b/result/HTML/implied1.html
index 6c86065..a937900 100644
--- a/result/HTML/implied1.html
+++ b/result/HTML/implied1.html
@@ -1,6 +1,6 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
 <html>
 <head>   </head>
-<body><p>x
-</p></body>
+<body>x
+</body>
 </html>
diff --git a/result/HTML/implied1.html.sax b/result/HTML/implied1.html.sax
index b3d7165..531bdf6 100644
--- a/result/HTML/implied1.html.sax
+++ b/result/HTML/implied1.html.sax
@@ -5,10 +5,8 @@
 SAX.characters(   , 3)
 SAX.endElement(head)
 SAX.startElement(body)
-SAX.startElement(p)
 SAX.characters(x
 , 2)
-SAX.endElement(p)
 SAX.endElement(body)
 SAX.endElement(html)
 SAX.endDocument()
diff --git a/testparser.c b/testparser.c
index 1d0f9e7..4004ac8 100644
--- a/testparser.c
+++ b/testparser.c
@@ -474,7 +474,7 @@
         err = 1;
     }
 
-    node = xmlDocGetRootElement(doc)->children->children->children;
+    node = xmlDocGetRootElement(doc)->children->children;
     if (!xmlStrEqual(node->content, BAD_CAST "-\xC3\xA4-")) {
         fprintf(stderr, "testHtmlPushWithEncoding failed\n");
         err = 1;