Improve parsing of doctype declarations

This code should handle all declarations that don't use a conditional
section anywhere.
diff --git a/yxml-states b/yxml-states
index 7e599d5..0134a51 100644
--- a/yxml-states
+++ b/yxml-states
@@ -134,12 +134,21 @@
 
 
 # Doctype, starting from '<!DOCTYPE', returns to misc1
-# TODO: This is a hack, all we do is read until we find a '>', not
-#   validating its content. This hack fails if the DTD contains a '>'
-#   character, which is very possible. Unfortunately, just figuring out where a
-#   DTD ends already requires a rather elaborate parser. :-(
-# <DOCTYPE " " ' ' [ " " ' ' <? ?> <!-- --> < " " ' ' > ]>
-dt0        '>' misc1; Char dt0
+# XXX: The state machine below only attempts to figure out where the doctype
+#   declaration ends, its contents are not actually parsed or validated.
+#   Basically, it allows the following nesting of tags/quotes/PIs/comments:
+#
+#     <!DOCTYPE ".." '..' <?PI ..?> <!--..--> <!.. ".." '.."> >
+#
+#   Only the last '>' is correctly recognized as the end of the declaration.
+#   Any other '>' found to end a tag/PI/comment, or found within quotes,
+#   comments or a PI, is ignored.
+# TODO: This still fails on conditional sections, which may nest.
+dt0        '>' misc1; '\''|'"' $quote @dt0 dt1; '<' dt2; Char dt0
+dt1        $quote @; Char dt1
+dt2        '?' @dt0 pi0; '!' dt3
+dt3        '-' @dt0 comment1; Char dt4
+dt4        '\''|'"' $quote @dt4 dt1; '>' dt0; Char dt4
 
 
 # End tag, after '</', returns to misc2
diff --git a/yxml.c b/yxml.c
index 8322b74..cbdada9 100644
--- a/yxml.c
+++ b/yxml.c
@@ -42,6 +42,10 @@
 	YXMLS_comment4,
 	YXMLS_comment5,
 	YXMLS_dt0,
+	YXMLS_dt1,
+	YXMLS_dt2,
+	YXMLS_dt3,
+	YXMLS_dt4,
 	YXMLS_elem0,
 	YXMLS_elem1,
 	YXMLS_elem2,
@@ -441,6 +445,60 @@
 			x->state = YXMLS_misc1;
 			return YXML_OK;
 		}
+		if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') {
+			x->state = YXMLS_dt1;
+			x->quote = ch;
+			x->nextstate = YXMLS_dt0;
+			return YXML_OK;
+		}
+		if(ch == (unsigned char)'<') {
+			x->state = YXMLS_dt2;
+			return YXML_OK;
+		}
+		if(yxml_isChar(ch))
+			return YXML_OK;
+		break;
+	case YXMLS_dt1:
+		if(x->quote == ch) {
+			x->state = x->nextstate;
+			return YXML_OK;
+		}
+		if(yxml_isChar(ch))
+			return YXML_OK;
+		break;
+	case YXMLS_dt2:
+		if(ch == (unsigned char)'?') {
+			x->state = YXMLS_pi0;
+			x->nextstate = YXMLS_dt0;
+			return YXML_OK;
+		}
+		if(ch == (unsigned char)'!') {
+			x->state = YXMLS_dt3;
+			return YXML_OK;
+		}
+		break;
+	case YXMLS_dt3:
+		if(ch == (unsigned char)'-') {
+			x->state = YXMLS_comment1;
+			x->nextstate = YXMLS_dt0;
+			return YXML_OK;
+		}
+		if(yxml_isChar(ch)) {
+			x->state = YXMLS_dt4;
+			return YXML_OK;
+		}
+		break;
+	case YXMLS_dt4:
+		if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') {
+			x->state = YXMLS_dt1;
+			x->quote = ch;
+			x->nextstate = YXMLS_dt4;
+			return YXML_OK;
+		}
+		if(ch == (unsigned char)'>') {
+			x->state = YXMLS_dt0;
+			return YXML_OK;
+		}
 		if(yxml_isChar(ch))
 			return YXML_OK;
 		break;