Add support for <![CDATA[ sections ]]>
diff --git a/yxml-states b/yxml-states
index d60c059..6ea698f 100644
--- a/yxml-states
+++ b/yxml-states
@@ -60,10 +60,10 @@
le0 '!' lee1; '?' leq0; NameStart elemstart elem0
le1 '!' lee1; '?' pi0; NameStart elemstart elem0
-# TODO: CDSect
-le2 '!' comment0; '?' pi0; '/' etag0; NameStart elemstart elem0
+le2 '!' lee2; '?' pi0; '/' etag0; NameStart elemstart elem0
-lee1 '-' comment1; 'D' dt0
+lee1 '-' comment0; 'D' dt0
+lee2 '-' comment0; '[' cd0
leq0 'x' xmldecl0; NameStart pi1
@@ -127,13 +127,12 @@
std15 $quote xmldecl8
-# Comment, after '<!', returns to misc1 or misc2
+# Comment, after '<!-', returns to misc1 or misc2
comment0 '-' comment1
-comment1 '-' comment2
-comment2 CommentStart comment3
-comment3 '-' comment4; Char comment3
-comment4 '-' comment5; Char comment3
-comment5 '>' retmisc comment5
+comment1 CommentStart comment2
+comment2 '-' comment3; Char comment2
+comment3 '-' comment4; Char comment2
+comment4 '>' retmisc comment4
# PI, starting from '<?', returns to misc1 or misc2
@@ -145,6 +144,18 @@
pi3 '>' retmisc pi3; Char pi2
+# CDSect, starting from '<![', returns to misc2
+cd0 'C' cd1
+cd1 'D' cd2
+cd2 'A' cd3
+cd3 'T' cd4
+cd4 'A' cd5
+cd5 '[' cd6
+cd6 ']' cd7; Char setdata cd6
+cd7 ']' cd8; Char setdata cd6
+cd8 '>' misc2
+
+
# Doctype, starting from '<!D', returns to misc1
# TODO: This is a hack, all we do is read until we find a '>', not even
# validating that this tag actually starts with <!DOCTYPE ..>, much less
diff --git a/yxml.c b/yxml.c
index 184f3eb..5c2dc33 100644
--- a/yxml.c
+++ b/yxml.c
@@ -32,12 +32,20 @@
YXMLS_attr4,
YXMLS_bom1,
YXMLS_bom2,
+ YXMLS_cd0,
+ YXMLS_cd1,
+ YXMLS_cd2,
+ YXMLS_cd3,
+ YXMLS_cd4,
+ YXMLS_cd5,
+ YXMLS_cd6,
+ YXMLS_cd7,
+ YXMLS_cd8,
YXMLS_comment0,
YXMLS_comment1,
YXMLS_comment2,
YXMLS_comment3,
YXMLS_comment4,
- YXMLS_comment5,
YXMLS_dt0,
YXMLS_elem0,
YXMLS_elem1,
@@ -62,6 +70,7 @@
YXMLS_le1,
YXMLS_le2,
YXMLS_lee1,
+ YXMLS_lee2,
YXMLS_leq0,
YXMLS_misc0,
YXMLS_misc1,
@@ -376,6 +385,66 @@
return YXML_OK;
}
break;
+ case YXMLS_cd0:
+ if(ch == (unsigned char)'C') {
+ x->state = YXMLS_cd1;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_cd1:
+ if(ch == (unsigned char)'D') {
+ x->state = YXMLS_cd2;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_cd2:
+ if(ch == (unsigned char)'A') {
+ x->state = YXMLS_cd3;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_cd3:
+ if(ch == (unsigned char)'T') {
+ x->state = YXMLS_cd4;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_cd4:
+ if(ch == (unsigned char)'A') {
+ x->state = YXMLS_cd5;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_cd5:
+ if(ch == (unsigned char)'[') {
+ x->state = YXMLS_cd6;
+ return YXML_OK;
+ }
+ break;
+ case YXMLS_cd6:
+ if(ch == (unsigned char)']') {
+ x->state = YXMLS_cd7;
+ return YXML_OK;
+ }
+ if(yxml_isChar(ch))
+ return yxml_setdata(x, ch);
+ break;
+ case YXMLS_cd7:
+ if(ch == (unsigned char)']') {
+ x->state = YXMLS_cd8;
+ return YXML_OK;
+ }
+ if(yxml_isChar(ch)) {
+ x->state = YXMLS_cd6;
+ return yxml_setdata(x, ch);
+ }
+ break;
+ case YXMLS_cd8:
+ if(ch == (unsigned char)'>') {
+ x->state = YXMLS_misc2;
+ return YXML_OK;
+ }
+ break;
case YXMLS_comment0:
if(ch == (unsigned char)'-') {
x->state = YXMLS_comment1;
@@ -383,36 +452,30 @@
}
break;
case YXMLS_comment1:
- if(ch == (unsigned char)'-') {
+ if(yxml_isCommentStart(ch)) {
x->state = YXMLS_comment2;
return YXML_OK;
}
break;
case YXMLS_comment2:
- if(yxml_isCommentStart(ch)) {
+ if(ch == (unsigned char)'-') {
x->state = YXMLS_comment3;
return YXML_OK;
}
+ if(yxml_isChar(ch))
+ return YXML_OK;
break;
case YXMLS_comment3:
if(ch == (unsigned char)'-') {
x->state = YXMLS_comment4;
return YXML_OK;
}
- if(yxml_isChar(ch))
+ if(yxml_isChar(ch)) {
+ x->state = YXMLS_comment2;
return YXML_OK;
+ }
break;
case YXMLS_comment4:
- if(ch == (unsigned char)'-') {
- x->state = YXMLS_comment5;
- return YXML_OK;
- }
- if(yxml_isChar(ch)) {
- x->state = YXMLS_comment3;
- return YXML_OK;
- }
- break;
- case YXMLS_comment5:
if(ch == (unsigned char)'>')
return yxml_retmisc(x, ch);
break;
@@ -619,7 +682,7 @@
break;
case YXMLS_le2:
if(ch == (unsigned char)'!') {
- x->state = YXMLS_comment0;
+ x->state = YXMLS_lee2;
return YXML_OK;
}
if(ch == (unsigned char)'?') {
@@ -637,7 +700,7 @@
break;
case YXMLS_lee1:
if(ch == (unsigned char)'-') {
- x->state = YXMLS_comment1;
+ x->state = YXMLS_comment0;
return YXML_OK;
}
if(ch == (unsigned char)'D') {
@@ -645,6 +708,16 @@
return YXML_OK;
}
break;
+ case YXMLS_lee2:
+ if(ch == (unsigned char)'-') {
+ x->state = YXMLS_comment0;
+ return YXML_OK;
+ }
+ if(ch == (unsigned char)'[') {
+ x->state = YXMLS_cd0;
+ return YXML_OK;
+ }
+ break;
case YXMLS_leq0:
if(ch == (unsigned char)'x') {
x->state = YXMLS_xmldecl0;