Fix returning of ']' chars within CDATA + de-generalized ?-in-PI

I thought I'd handle the ?-in-PI and ]-in-CDATA problems in a more
general solution, but realized that wasn't any simpler or smaller than
these specific solutions.
diff --git a/test/content01.out b/test/content01.out
index 333e1e8..572a916 100644
--- a/test/content01.out
+++ b/test/content01.out
@@ -11,6 +11,6 @@
 content
 data  ! !
 elemend
-data \x0aCDATA!\x0a[[CD<a/> <!-- no comment -->&amp;<?NotaPI?>&notaref;\x0aBug: The following character *should* be parsed as a DATA token, but currently isn't: \x0aThis does work, however: ]]\x0a
+data \x0aCDATA!\x0a[[CD<a/> <!-- no comment -->&amp;<?NotaPI?>&notaref;\x0a]x]]y]]]z]]\x0a
 elemend
 ok
diff --git a/test/content01.xml b/test/content01.xml
index f9c74a8..4689c13 100644
--- a/test/content01.xml
+++ b/test/content01.xml
@@ -4,6 +4,5 @@
 <refs>&#x20;&#33;&#x0020;&#0033;</refs>
 <![CDATA[CDATA!]]>
 <![CDATA[[[CD<a/> <!-- no comment -->&amp;<?NotaPI?>&notaref;
-Bug: The following character *should* be parsed as a DATA token, but currently isn't: ]
-This does work, however: ]]]]>
+]x]]y]]]z]]]]>
 </a>
diff --git a/yxml-gen.pl b/yxml-gen.pl
index affaa84..dde243a 100755
--- a/yxml-gen.pl
+++ b/yxml-gen.pl
@@ -49,7 +49,7 @@
   # return an error code. Functions that may return an error should NOT be
   # called in the same state as other functions.
   for(@_) {
-    push @r, "yxml_$1(x, ch)" if /^([a-z_]+)$/;
+    push @r, "yxml_$1(x, ch)" if /^([a-z0-9_]+)$/;
     push @c, "x->$1 = ch" if /^\$(.+)$/;
     if(/^"/) {
       push @c, (
diff --git a/yxml-states b/yxml-states
index c4fb8fe..acf4ec5 100644
--- a/yxml-states
+++ b/yxml-states
@@ -120,18 +120,15 @@
 # PI, starting from '<?', returns to @
 pi0        NameStart pistart pi1
 pi1        Name piname pi1; '?' pinameend pi4; SP pinameend pi2
-pi2        '?' datahold pi3; Char dataset pi2
-pi3        '>' pivalend @; Char datarelease pi2
+pi2        '?' pi3; Char dataset pi2
+pi3        '>' pivalend @; Char datapi pi2
 pi4        '>' pivalend @
 
 
 # CDSect, starting from '<![DATA[', returns to misc2
-# TODO: "<![CDATA[ ] ]]>" is parsed correctly, but the ']' is not sent to
-#   the application as a DATA token. No idea how to easily fix that. Currently,
-#   "<![CDATA[ ]]]]>" does work correctly.
 cd0        ']' cd1; Char dataset cd0
-cd1        ']' cd2; Char dataset cd0
-cd2        ']' dataset cd2; '>' misc2; Char dataset cd0
+cd1        ']' cd2; Char datacd1 cd0
+cd2        ']' dataset cd2; '>' misc2; Char datacd2 cd0
 
 
 # Doctype, starting from '<!DOCTYPE', returns to misc1
@@ -173,6 +170,6 @@
 attr0      Name attrname attr0; SP attrnameend attr1; '=' attrnameend attr2
 attr1      SP attr1; '=' attr2
 attr2      SP attr2; '\''|'"' $quote attr3
-attr3      AttValue attrvalset attr3; '&' refstart attr4; $quote attrvalend elem2
+attr3      AttValue dataattr attr3; '&' refstart attr4; $quote attrvalend elem2
 attr4      Ref ref attr4; '\x3b' refend attr3
 
diff --git a/yxml.c b/yxml.c
index 5bafe38..c451816 100644
--- a/yxml.c
+++ b/yxml.c
@@ -128,24 +128,32 @@
 }
 
 
-static inline int yxml_datahold(yxml_t *x, unsigned ch) {
-	yxml_setchar(x->data, ch);
-	x->data[1] = 0;
-	return YXML_OK;
-}
-
-
-static inline int yxml_datarelease(yxml_t *x, unsigned ch) {
-	char *r = x->data;
-	while(*r)
-		r++;
-	yxml_setchar(r, ch);
-	r[1] = 0;
+static inline int yxml_datapi(yxml_t *x, unsigned ch) {
+	x->data[0] = '?';
+	yxml_setchar(x->data+1, ch);
+	x->data[2] = 0;
 	return YXML_DATA;
 }
 
 
-static inline int yxml_attrvalset(yxml_t *x, unsigned ch) {
+static inline int yxml_datacd1(yxml_t *x, unsigned ch) {
+	x->data[0] = ']';
+	yxml_setchar(x->data+1, ch);
+	x->data[2] = 0;
+	return YXML_DATA;
+}
+
+
+static inline int yxml_datacd2(yxml_t *x, unsigned ch) {
+	x->data[0] = ']';
+	x->data[1] = ']';
+	yxml_setchar(x->data+2, ch);
+	x->data[3] = 0;
+	return YXML_DATA;
+}
+
+
+static inline int yxml_dataattr(yxml_t *x, unsigned ch) {
 	/* Normalize attribute values according to the XML spec section 3.3.3. */
 	return yxml_dataset(x, ch == 0x9 || ch == 0xa ? 0x20 : ch);
 }
@@ -357,7 +365,7 @@
 		break;
 	case YXMLS_attr3:
 		if(yxml_isAttValue(ch))
-			return yxml_attrvalset(x, ch);
+			return yxml_dataattr(x, ch);
 		if(ch == (unsigned char)'&') {
 			x->state = YXMLS_attr4;
 			return yxml_refstart(x, ch);
@@ -390,7 +398,7 @@
 		}
 		if(yxml_isChar(ch)) {
 			x->state = YXMLS_cd0;
-			return yxml_dataset(x, ch);
+			return yxml_datacd1(x, ch);
 		}
 		break;
 	case YXMLS_cd2:
@@ -402,7 +410,7 @@
 		}
 		if(yxml_isChar(ch)) {
 			x->state = YXMLS_cd0;
-			return yxml_dataset(x, ch);
+			return yxml_datacd2(x, ch);
 		}
 		break;
 	case YXMLS_comment0:
@@ -792,7 +800,7 @@
 	case YXMLS_pi2:
 		if(ch == (unsigned char)'?') {
 			x->state = YXMLS_pi3;
-			return yxml_datahold(x, ch);
+			return YXML_OK;
 		}
 		if(yxml_isChar(ch))
 			return yxml_dataset(x, ch);
@@ -804,7 +812,7 @@
 		}
 		if(yxml_isChar(ch)) {
 			x->state = YXMLS_pi2;
-			return yxml_datarelease(x, ch);
+			return yxml_datapi(x, ch);
 		}
 		break;
 	case YXMLS_pi4:
diff --git a/yxml.c.in b/yxml.c.in
index cc68f50..fe30728 100644
--- a/yxml.c.in
+++ b/yxml.c.in
@@ -64,24 +64,32 @@
 }
 
 
-static inline int yxml_datahold(yxml_t *x, unsigned ch) {
-	yxml_setchar(x->data, ch);
-	x->data[1] = 0;
-	return YXML_OK;
-}
-
-
-static inline int yxml_datarelease(yxml_t *x, unsigned ch) {
-	char *r = x->data;
-	while(*r)
-		r++;
-	yxml_setchar(r, ch);
-	r[1] = 0;
+static inline int yxml_datapi(yxml_t *x, unsigned ch) {
+	x->data[0] = '?';
+	yxml_setchar(x->data+1, ch);
+	x->data[2] = 0;
 	return YXML_DATA;
 }
 
 
-static inline int yxml_attrvalset(yxml_t *x, unsigned ch) {
+static inline int yxml_datacd1(yxml_t *x, unsigned ch) {
+	x->data[0] = ']';
+	yxml_setchar(x->data+1, ch);
+	x->data[2] = 0;
+	return YXML_DATA;
+}
+
+
+static inline int yxml_datacd2(yxml_t *x, unsigned ch) {
+	x->data[0] = ']';
+	x->data[1] = ']';
+	yxml_setchar(x->data+2, ch);
+	x->data[3] = 0;
+	return YXML_DATA;
+}
+
+
+static inline int yxml_dataattr(yxml_t *x, unsigned ch) {
 	/* Normalize attribute values according to the XML spec section 3.3.3. */
 	return yxml_dataset(x, ch == 0x9 || ch == 0xa ? 0x20 : ch);
 }
diff --git a/yxml.h b/yxml.h
index 8b87d0d..ec22db5 100644
--- a/yxml.h
+++ b/yxml.h
@@ -77,9 +77,10 @@
 	/* The last read character(s) of an attribute value, element data, or
 	 * processing instruction. Changed after YXML_DATA and only valid until the
 	 * next yxml_parse() call. Usually, this string only consists of a single
-	 * character, but multiple characters may be returned in the following case:
-	 * - "<?SomePI ?x ?>": The two characters "?x" are returned in a single
-	 *   data token.
+	 * character, but multiple characters are returned in the following cases:
+	 * - "<?SomePI ?x ?>": The two characters "?x"
+	 * - "<![CDATA[ ]x ]]>": The two characters "]x"
+	 * - "<![CDATA[ ]]x ]]>": The three characters "]]x"
 	 */
 	char data[8];