Fix minor bug in comment parsing + add some tests for comments
diff --git a/test/comment01.out b/test/comment01.out
new file mode 100644
index 0000000..43d87c6
--- /dev/null
+++ b/test/comment01.out
@@ -0,0 +1,5 @@
+
+elemstart a
+content
+elemend
+ok
diff --git a/test/comment01.xml b/test/comment01.xml
new file mode 100644
index 0000000..2c0c511
--- /dev/null
+++ b/test/comment01.xml
@@ -0,0 +1 @@
+<!-- ab-c --><a><!----><!--- --><!--xyz--></a><!-- -->
diff --git a/test/comment_err01.out b/test/comment_err01.out
new file mode 100644
index 0000000..9456d41
--- /dev/null
+++ b/test/comment_err01.out
@@ -0,0 +1,2 @@
+
+error
diff --git a/test/comment_err01.xml b/test/comment_err01.xml
new file mode 100644
index 0000000..9fd4729
--- /dev/null
+++ b/test/comment_err01.xml
@@ -0,0 +1 @@
+<!-- i-- --><a/>
diff --git a/test/comment_err02.out b/test/comment_err02.out
new file mode 100644
index 0000000..9456d41
--- /dev/null
+++ b/test/comment_err02.out
@@ -0,0 +1,2 @@
+
+error
diff --git a/test/comment_err02.xml b/test/comment_err02.xml
new file mode 100644
index 0000000..e21fa1a
--- /dev/null
+++ b/test/comment_err02.xml
@@ -0,0 +1 @@
+<!-- ---><a/>
diff --git a/test/comment_err03.out b/test/comment_err03.out
new file mode 100644
index 0000000..9456d41
--- /dev/null
+++ b/test/comment_err03.out
@@ -0,0 +1,2 @@
+
+error
diff --git a/test/comment_err03.xml b/test/comment_err03.xml
new file mode 100644
index 0000000..33ddc57
--- /dev/null
+++ b/test/comment_err03.xml
@@ -0,0 +1 @@
+<!- abc --><a/>
diff --git a/yxml-states b/yxml-states
index efdc17b..861f8e3 100644
--- a/yxml-states
+++ b/yxml-states
@@ -112,10 +112,9 @@
# Comment, after '<!', returns to @
comment0 '-' comment1
comment1 '-' comment2
-comment2 CommentStart comment3
-comment3 '-' comment4; Char comment3
-comment4 '-' comment5; Char comment3
-comment5 '>' @
+comment2 '-' comment3; Char comment2
+comment3 '-' comment4; Char comment2
+comment4 '>' @
# PI, starting from '<?', returns to @
diff --git a/yxml.c b/yxml.c
index 05bd6de..44de31f 100644
--- a/yxml.c
+++ b/yxml.c
@@ -40,7 +40,6 @@
YXMLS_comment2,
YXMLS_comment3,
YXMLS_comment4,
- YXMLS_comment5,
YXMLS_dt0,
YXMLS_dt1,
YXMLS_dt2,
@@ -101,7 +100,6 @@
#define yxml_isNum(c) (c-'0' < 10)
#define yxml_isHex(c) (yxml_isNum(c) || (c|32)-'a' < 6)
#define yxml_isEncName(c) (yxml_isAlpha(c) || yxml_isNum(c) || c == '.' || c == '_' || c == '-')
-#define yxml_isCommentStart(c) (yxml_isChar(c) && c != '-')
#define yxml_isNameStart(c) (yxml_isAlpha(c) || c == ':' || c == '_')
#define yxml_isName(c) (yxml_isNameStart(c) || yxml_isNum(c) || c == '-' || c == '.')
/* XXX: The valid characters are dependent on the quote char, hence the access to x->quote */
@@ -391,30 +389,24 @@
}
break;
case YXMLS_comment2:
- if(yxml_isCommentStart(ch)) {
+ if(ch == (unsigned char)'-') {
x->state = YXMLS_comment3;
return YXML_OK;
}
+ if(yxml_isChar(ch))
+ return YXML_OK;
break;
case YXMLS_comment3:
if(ch == (unsigned char)'-') {
x->state = YXMLS_comment4;
return YXML_OK;
}
- if(yxml_isChar(ch))
+ if(yxml_isChar(ch)) {
+ x->state = YXMLS_comment2;
return YXML_OK;
+ }
break;
case YXMLS_comment4:
- if(ch == (unsigned char)'-') {
- x->state = YXMLS_comment5;
- return YXML_OK;
- }
- if(yxml_isChar(ch)) {
- x->state = YXMLS_comment3;
- return YXML_OK;
- }
- break;
- case YXMLS_comment5:
if(ch == (unsigned char)'>') {
x->state = x->nextstate;
return YXML_OK;
diff --git a/yxml.c.in b/yxml.c.in
index 619240c..6a32a08 100644
--- a/yxml.c.in
+++ b/yxml.c.in
@@ -36,7 +36,6 @@
#define yxml_isNum(c) (c-'0' < 10)
#define yxml_isHex(c) (yxml_isNum(c) || (c|32)-'a' < 6)
#define yxml_isEncName(c) (yxml_isAlpha(c) || yxml_isNum(c) || c == '.' || c == '_' || c == '-')
-#define yxml_isCommentStart(c) (yxml_isChar(c) && c != '-')
#define yxml_isNameStart(c) (yxml_isAlpha(c) || c == ':' || c == '_')
#define yxml_isName(c) (yxml_isNameStart(c) || yxml_isNum(c) || c == '-' || c == '.')
/* XXX: The valid characters are dependent on the quote char, hence the access to x->quote */