blob: d2b94ff9a7519bb964505c7c395cfbc81f9086b1 [file] [log] [blame]
/* THIS FILE IS AUTOMATICALLY GENERATED, DO NOT EDIT! */
/* Copyright (c) 2013 Yoran Heling
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <yxml.h>
#include <string.h>
typedef enum {
YXMLS_string,
YXMLS_attr0,
YXMLS_attr1,
YXMLS_attr2,
YXMLS_attr3,
YXMLS_attr4,
YXMLS_cd0,
YXMLS_cd1,
YXMLS_cd2,
YXMLS_comment0,
YXMLS_comment1,
YXMLS_comment2,
YXMLS_comment3,
YXMLS_comment4,
YXMLS_dt0,
YXMLS_elem0,
YXMLS_elem1,
YXMLS_elem2,
YXMLS_elem3,
YXMLS_enc0,
YXMLS_enc1,
YXMLS_enc2,
YXMLS_enc3,
YXMLS_etag0,
YXMLS_etag1,
YXMLS_etag2,
YXMLS_init,
YXMLS_le0,
YXMLS_le1,
YXMLS_le2,
YXMLS_lee1,
YXMLS_lee2,
YXMLS_leq0,
YXMLS_misc0,
YXMLS_misc1,
YXMLS_misc2,
YXMLS_misc2a,
YXMLS_pi0,
YXMLS_pi1,
YXMLS_pi2,
YXMLS_pi3,
YXMLS_std0,
YXMLS_std1,
YXMLS_std2,
YXMLS_std3,
YXMLS_ver0,
YXMLS_ver1,
YXMLS_ver2,
YXMLS_ver3,
YXMLS_xmldecl0,
YXMLS_xmldecl1,
YXMLS_xmldecl2,
YXMLS_xmldecl3,
YXMLS_xmldecl4,
YXMLS_xmldecl5,
YXMLS_xmldecl6,
YXMLS_xmldecl7
} yxml_state_t;
#define yxml_isChar(c) c
#define yxml_isSP(c) (c == 0x20 || c == 0x09 || c == 0x0d || c == 0x0a)
#define yxml_isAlpha(c) ((c|32)-'a' < 26)
#define yxml_isNum(c) (c-'0' < 10)
#define yxml_isHex(c) (yxml_isNum(c) || (c|32)-'a' < 6)
#define yxml_isEncName(c) (yxml_isAlpha(c) || yxml_isNum(c) || c == '.' || c == '_' || c == '-')
#define yxml_isCommentStart(c) (yxml_isChar(c) && c != '-')
#define yxml_isNameStart(c) (yxml_isAlpha(c) || c == ':')
#define yxml_isName(c) (yxml_isNameStart(c) || yxml_isNum(c) || c == '-' || c == '.')
/* XXX: The valid characters are dependent on the quote char, hence the access to x->quote */
#define yxml_isAttValue(c) (yxml_isChar(c) && c != x->quote && c != '<' && c != '&')
/* Anything between '&' and ';', the yxml_ref* functions will do further
* validation. Strictly speaking, this is "yxml_isName(c) || c == '#'", but
* this parser doesn't understand entities with '.', ':', etc, anwyay. */
#define yxml_isRef(c) (yxml_isNum(c) || yxml_isAlpha(c) || c == '#')
/* Set the x->data value to ch and tell the application we have some data.
* This can't be done with simple assignment because char may be unsigned, and
* unsigned-to-signed overflow is implementation defined in C. This function
* /looks/ inefficient, but gcc compiles it down to a single movb instruction
* on x86, even with -O0. */
static inline int yxml_setdata(yxml_t *x, unsigned ch) {
unsigned char _ch = ch;
memcpy(&x->data, &_ch, 1);
return YXML_DATA;
}
/* Go to the misc1 or misc2 state, depending on whether a tag has been opened
* previously or not. (This is a hack to work around a limitation in the state
* machine description in yxml-states) */
static inline int yxml_retmisc(yxml_t *x, unsigned ch) {
x->state = x->stacklen ? YXMLS_misc2 : YXMLS_misc1;
return YXML_OK;
}
static int yxml_elemstart(yxml_t *x, unsigned ch) {
if(x->stacklen+2 >= x->stacksize)
return YXML_ESTACK;
x->stacklen++;
x->elem = (char *)x->stack+x->stacklen;
x->stack[x->stacklen] = ch;
x->stacklen++;
x->stack[x->stacklen] = 0;
return YXML_OK;
}
static inline int yxml_elemname(yxml_t *x, unsigned ch) {
if(x->stacklen+1 >= x->stacksize)
return YXML_ESTACK;
x->stack[x->stacklen] = ch;
x->stacklen++;
x->stack[x->stacklen] = 0;
return YXML_OK;
}
static inline int yxml_elemnameend(yxml_t *x, unsigned ch) {
return YXML_OPEN;
}
/* Also used in yxml_elemcloseend(), since this function just removes the last
* element from the stack and returns CLOSE and EOD when appropriate. */
static int yxml_selfclose(yxml_t *x, unsigned ch) {
do
x->stacklen--;
while(x->stack[x->stacklen]);
if(x->stacklen) {
x->elem = (char *)x->stack+x->stacklen-1;
while(*(x->elem-1))
x->elem--;
return YXML_CLOSE;
}
x->elem = (char *)x->stack;
return YXML_CLOSE | YXML_EOD;
}
static inline int yxml_elemclose(yxml_t *x, unsigned ch) {
if(*((unsigned char *)x->elem) != ch)
return YXML_ECLOSE;
x->elem++;
return YXML_OK;
}
static inline int yxml_elemcloseend(yxml_t *x, unsigned ch) {
if(*x->elem)
return YXML_ECLOSE;
return yxml_selfclose(x, ch);
}
static inline int yxml_attrstart(yxml_t *x, unsigned ch) {
x->attrlen = 1;
x->attr[0] = ch;
x->attr[1] = 0;
return YXML_OK;
}
static inline int yxml_attrname(yxml_t *x, unsigned ch) {
if(x->attrlen >= YXML_MAX_ATTRNAME)
return YXML_EATTR;
x->attr[x->attrlen] = ch;
x->attrlen++;
x->attr[x->attrlen] = 0;
return YXML_OK;
}
static inline int yxml_attrnameend(yxml_t *x, unsigned ch) {
return YXML_ATTR;
}
static inline int yxml_attrsend(yxml_t *x, unsigned ch) {
return YXML_EOA;
}
static inline int yxml_refstart(yxml_t *x, unsigned ch) {
memset(x->ref, 0, YXML_MAX_REF+1);
x->reflen = 0;
return YXML_OK;
}
static int yxml_ref(yxml_t *x, unsigned ch) {
if(x->reflen >= YXML_MAX_REF)
return YXML_EREF;
x->ref[x->reflen] = ch;
x->reflen++;
return YXML_OK;
}
static int yxml_refend(yxml_t *x, unsigned ch) {
unsigned char *r = x->ref;
ch = 0;
if(*r == '#') {
if(r[1] == 'x')
for(r += 2; yxml_isHex((unsigned)*r); r++)
ch = (ch<<4) + (*r <= '9' ? *r-'0' : (*r|32)-'a' + 10);
else
for(r++; yxml_isNum((unsigned)*r); r++)
ch = (ch*10) + (*r-'0');
if(*r)
ch = 0;
} else {
uint64_t ri;
memcpy(&ri, r, 8);
if(ri == *((uint64_t *)"lt\0\0\0\0\0"))
ch = '<';
else if(ri == *((uint64_t *)"gt\0\0\0\0\0"))
ch = '>';
else if(ri == *((uint64_t *)"amp\0\0\0\0"))
ch = '&';
else if(ri == *((uint64_t *)"apos\0\0\0"))
ch = '\'';
else if(ri == *((uint64_t *)"quot\0\0\0"))
ch = '"';
}
/* XXX: The API does not allow returning more than one byte at a time, so
* CharRefs only work for ASCII at the moment. This is kind of stupid. */
if(!ch || ch > 127)
return YXML_EREF;
return yxml_setdata(x, ch);
}
void yxml_init(yxml_t *x, char *stack, size_t stacksize) {
memset(x, 0, sizeof(*x));
x->line = 1;
x->stack = (unsigned char *)stack;
x->stacksize = stacksize;
*x->stack = 0;
x->elem = (char *)x->stack;
x->state = YXMLS_init;
}
yxml_ret_t yxml_parse(yxml_t *x, int _ch) {
/* Ensure that characters are in the range of 0..255 rather than -126..125.
* All character comparisons are done with positive integers. */
unsigned ch = (unsigned)(_ch+256) & 0xff;
/* TODO: Validate UTF-8 correctness? */
if(ch == '\n') {
x->line++;
x->byte = 0;
}
x->byte++;
x->total++;
switch((yxml_state_t)x->state) {
case YXMLS_string:
if(ch == *x->string) {
x->string++;
if(!*x->string)
x->state = x->stringstate;
return YXML_OK;
}
break;
case YXMLS_attr0:
if(yxml_isName(ch))
return yxml_attrname(x, ch);
if(yxml_isSP(ch)) {
x->state = YXMLS_attr1;
return yxml_attrnameend(x, ch);
}
if(ch == (unsigned char)'=') {
x->state = YXMLS_attr2;
return yxml_attrnameend(x, ch);
}
break;
case YXMLS_attr1:
if(yxml_isSP(ch))
return YXML_OK;
if(ch == (unsigned char)'=') {
x->state = YXMLS_attr2;
return YXML_OK;
}
break;
case YXMLS_attr2:
if(yxml_isSP(ch))
return YXML_OK;
if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') {
x->state = YXMLS_attr3;
x->quote = ch;
return YXML_OK;
}
break;
case YXMLS_attr3:
if(yxml_isAttValue(ch))
return yxml_setdata(x, ch);
if(ch == (unsigned char)'&') {
x->state = YXMLS_attr4;
return yxml_refstart(x, ch);
}
if(x->quote == ch) {
x->state = YXMLS_elem2;
return YXML_OK;
}
break;
case YXMLS_attr4:
if(yxml_isRef(ch))
return yxml_ref(x, ch);
if(ch == (unsigned char)'\x3b') {
x->state = YXMLS_attr3;
return yxml_refend(x, ch);
}
break;
case YXMLS_cd0:
if(ch == (unsigned char)']') {
x->state = YXMLS_cd1;
return YXML_OK;
}
if(yxml_isChar(ch))
return yxml_setdata(x, ch);
break;
case YXMLS_cd1:
if(ch == (unsigned char)']') {
x->state = YXMLS_cd2;
return YXML_OK;
}
if(yxml_isChar(ch)) {
x->state = YXMLS_cd0;
return yxml_setdata(x, ch);
}
break;
case YXMLS_cd2:
if(ch == (unsigned char)'>') {
x->state = YXMLS_misc2;
return YXML_OK;
}
break;
case YXMLS_comment0:
if(ch == (unsigned char)'-') {
x->state = YXMLS_comment1;
return YXML_OK;
}
break;
case YXMLS_comment1:
if(yxml_isCommentStart(ch)) {
x->state = YXMLS_comment2;
return YXML_OK;
}
break;
case YXMLS_comment2:
if(ch == (unsigned char)'-') {
x->state = YXMLS_comment3;
return YXML_OK;
}
if(yxml_isChar(ch))
return YXML_OK;
break;
case YXMLS_comment3:
if(ch == (unsigned char)'-') {
x->state = YXMLS_comment4;
return YXML_OK;
}
if(yxml_isChar(ch)) {
x->state = YXMLS_comment2;
return YXML_OK;
}
break;
case YXMLS_comment4:
if(ch == (unsigned char)'>')
return yxml_retmisc(x, ch);
break;
case YXMLS_dt0:
if(ch == (unsigned char)'>') {
x->state = YXMLS_misc1;
return YXML_OK;
}
if(yxml_isChar(ch))
return YXML_OK;
break;
case YXMLS_elem0:
if(yxml_isName(ch))
return yxml_elemname(x, ch);
if(yxml_isSP(ch)) {
x->state = YXMLS_elem1;
return yxml_elemnameend(x, ch);
}
if(ch == (unsigned char)'/') {
x->state = YXMLS_elem3;
return yxml_elemnameend(x, ch)|yxml_attrsend(x, ch);
}
if(ch == (unsigned char)'>') {
x->state = YXMLS_misc2;
return yxml_elemnameend(x, ch)|yxml_attrsend(x, ch);
}
break;
case YXMLS_elem1:
if(yxml_isSP(ch))
return YXML_OK;
if(ch == (unsigned char)'/') {
x->state = YXMLS_elem3;
return yxml_attrsend(x, ch);
}
if(ch == (unsigned char)'>') {
x->state = YXMLS_misc2;
return yxml_attrsend(x, ch);
}
if(yxml_isNameStart(ch)) {
x->state = YXMLS_attr0;
return yxml_attrstart(x, ch);
}
break;
case YXMLS_elem2:
if(yxml_isSP(ch)) {
x->state = YXMLS_elem1;
return YXML_OK;
}
if(ch == (unsigned char)'/') {
x->state = YXMLS_elem3;
return yxml_attrsend(x, ch);
}
if(ch == (unsigned char)'>') {
x->state = YXMLS_misc2;
return yxml_attrsend(x, ch);
}
break;
case YXMLS_elem3:
if(ch == (unsigned char)'>') {
x->state = YXMLS_misc2;
return yxml_selfclose(x, ch);
}
break;
case YXMLS_enc0:
if(yxml_isSP(ch))
return YXML_OK;
if(ch == (unsigned char)'=') {
x->state = YXMLS_enc1;
return YXML_OK;
}
break;
case YXMLS_enc1:
if(yxml_isSP(ch))
return YXML_OK;
if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') {
x->state = YXMLS_enc2;
x->quote = ch;
return YXML_OK;
}
break;
case YXMLS_enc2:
if(yxml_isAlpha(ch)) {
x->state = YXMLS_enc3;
return YXML_OK;
}
break;
case YXMLS_enc3:
if(yxml_isEncName(ch))
return YXML_OK;
if(x->quote == ch) {
x->state = YXMLS_xmldecl4;
return YXML_OK;
}
break;
case YXMLS_etag0:
if(yxml_isNameStart(ch)) {
x->state = YXMLS_etag1;
return yxml_elemclose(x, ch);
}
break;
case YXMLS_etag1:
if(yxml_isName(ch))
return yxml_elemclose(x, ch);
if(yxml_isSP(ch)) {
x->state = YXMLS_etag2;
return yxml_elemcloseend(x, ch);
}
if(ch == (unsigned char)'>') {
x->state = YXMLS_misc2;
return yxml_elemcloseend(x, ch);
}
break;
case YXMLS_etag2:
if(yxml_isSP(ch))
return YXML_OK;
if(ch == (unsigned char)'>') {
x->state = YXMLS_misc2;
return YXML_OK;
}
break;
case YXMLS_init:
if(ch == (unsigned char)'\xef') {
x->state = YXMLS_string;
x->stringstate = YXMLS_misc0;
x->string = (unsigned char *)"\xbb\xbf";
return YXML_OK;
}
if(yxml_isSP(ch)) {
x->state = YXMLS_misc0;
return YXML_OK;
}
if(ch == (unsigned char)'<') {
x->state = YXMLS_le0;
return YXML_OK;
}
break;
case YXMLS_le0:
if(ch == (unsigned char)'!') {
x->state = YXMLS_lee1;
return YXML_OK;
}
if(ch == (unsigned char)'?') {
x->state = YXMLS_leq0;
return YXML_OK;
}
if(yxml_isNameStart(ch)) {
x->state = YXMLS_elem0;
return yxml_elemstart(x, ch);
}
break;
case YXMLS_le1:
if(ch == (unsigned char)'!') {
x->state = YXMLS_lee1;
return YXML_OK;
}
if(ch == (unsigned char)'?') {
x->state = YXMLS_pi0;
return YXML_OK;
}
if(yxml_isNameStart(ch)) {
x->state = YXMLS_elem0;
return yxml_elemstart(x, ch);
}
break;
case YXMLS_le2:
if(ch == (unsigned char)'!') {
x->state = YXMLS_lee2;
return YXML_OK;
}
if(ch == (unsigned char)'?') {
x->state = YXMLS_pi0;
return YXML_OK;
}
if(ch == (unsigned char)'/') {
x->state = YXMLS_etag0;
return YXML_OK;
}
if(yxml_isNameStart(ch)) {
x->state = YXMLS_elem0;
return yxml_elemstart(x, ch);
}
break;
case YXMLS_lee1:
if(ch == (unsigned char)'-') {
x->state = YXMLS_comment0;
return YXML_OK;
}
if(ch == (unsigned char)'D') {
x->state = YXMLS_string;
x->stringstate = YXMLS_dt0;
x->string = (unsigned char *)"OCTYPE";
return YXML_OK;
}
break;
case YXMLS_lee2:
if(ch == (unsigned char)'-') {
x->state = YXMLS_comment0;
return YXML_OK;
}
if(ch == (unsigned char)'[') {
x->state = YXMLS_string;
x->stringstate = YXMLS_cd0;
x->string = (unsigned char *)"CDATA[";
return YXML_OK;
}
break;
case YXMLS_leq0:
if(ch == (unsigned char)'x') {
x->state = YXMLS_string;
x->stringstate = YXMLS_xmldecl0;
x->string = (unsigned char *)"ml";
return YXML_OK;
}
if(yxml_isNameStart(ch)) {
x->state = YXMLS_pi1;
return YXML_OK;
}
break;
case YXMLS_misc0:
if(yxml_isSP(ch))
return YXML_OK;
if(ch == (unsigned char)'<') {
x->state = YXMLS_le0;
return YXML_OK;
}
break;
case YXMLS_misc1:
if(yxml_isSP(ch))
return YXML_OK;
if(ch == (unsigned char)'<') {
x->state = YXMLS_le1;
return YXML_OK;
}
break;
case YXMLS_misc2:
if(ch == (unsigned char)'<') {
x->state = YXMLS_le2;
return YXML_OK;
}
if(ch == (unsigned char)'&') {
x->state = YXMLS_misc2a;
return yxml_refstart(x, ch);
}
if(yxml_isChar(ch))
return yxml_setdata(x, ch);
break;
case YXMLS_misc2a:
if(yxml_isRef(ch))
return yxml_ref(x, ch);
if(ch == (unsigned char)'\x3b') {
x->state = YXMLS_misc2;
return yxml_refend(x, ch);
}
break;
case YXMLS_pi0:
if(yxml_isNameStart(ch)) {
x->state = YXMLS_pi1;
return YXML_OK;
}
break;
case YXMLS_pi1:
if(yxml_isName(ch))
return YXML_OK;
if(yxml_isSP(ch)) {
x->state = YXMLS_pi2;
return YXML_OK;
}
break;
case YXMLS_pi2:
if(ch == (unsigned char)'?') {
x->state = YXMLS_pi3;
return YXML_OK;
}
if(yxml_isChar(ch))
return YXML_OK;
break;
case YXMLS_pi3:
if(ch == (unsigned char)'>')
return yxml_retmisc(x, ch);
if(yxml_isChar(ch)) {
x->state = YXMLS_pi2;
return YXML_OK;
}
break;
case YXMLS_std0:
if(yxml_isSP(ch))
return YXML_OK;
if(ch == (unsigned char)'=') {
x->state = YXMLS_std1;
return YXML_OK;
}
break;
case YXMLS_std1:
if(yxml_isSP(ch))
return YXML_OK;
if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') {
x->state = YXMLS_std2;
x->quote = ch;
return YXML_OK;
}
break;
case YXMLS_std2:
if(ch == (unsigned char)'y') {
x->state = YXMLS_string;
x->stringstate = YXMLS_std3;
x->string = (unsigned char *)"es";
return YXML_OK;
}
if(ch == (unsigned char)'n') {
x->state = YXMLS_string;
x->stringstate = YXMLS_std3;
x->string = (unsigned char *)"o";
return YXML_OK;
}
break;
case YXMLS_std3:
if(x->quote == ch) {
x->state = YXMLS_xmldecl6;
return YXML_OK;
}
break;
case YXMLS_ver0:
if(yxml_isSP(ch))
return YXML_OK;
if(ch == (unsigned char)'=') {
x->state = YXMLS_ver1;
return YXML_OK;
}
break;
case YXMLS_ver1:
if(yxml_isSP(ch))
return YXML_OK;
if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') {
x->state = YXMLS_string;
x->quote = ch;
x->stringstate = YXMLS_ver2;
x->string = (unsigned char *)"1.";
return YXML_OK;
}
break;
case YXMLS_ver2:
if(yxml_isNum(ch)) {
x->state = YXMLS_ver3;
return YXML_OK;
}
break;
case YXMLS_ver3:
if(yxml_isNum(ch))
return YXML_OK;
if(x->quote == ch) {
x->state = YXMLS_xmldecl2;
return YXML_OK;
}
break;
case YXMLS_xmldecl0:
if(yxml_isSP(ch)) {
x->state = YXMLS_xmldecl1;
return YXML_OK;
}
break;
case YXMLS_xmldecl1:
if(yxml_isSP(ch))
return YXML_OK;
if(ch == (unsigned char)'v') {
x->state = YXMLS_string;
x->stringstate = YXMLS_ver0;
x->string = (unsigned char *)"ersion";
return YXML_OK;
}
break;
case YXMLS_xmldecl2:
if(yxml_isSP(ch)) {
x->state = YXMLS_xmldecl3;
return YXML_OK;
}
if(ch == (unsigned char)'?') {
x->state = YXMLS_xmldecl7;
return YXML_OK;
}
break;
case YXMLS_xmldecl3:
if(yxml_isSP(ch))
return YXML_OK;
if(ch == (unsigned char)'?') {
x->state = YXMLS_xmldecl7;
return YXML_OK;
}
if(ch == (unsigned char)'e') {
x->state = YXMLS_string;
x->stringstate = YXMLS_enc0;
x->string = (unsigned char *)"ncoding";
return YXML_OK;
}
if(ch == (unsigned char)'s') {
x->state = YXMLS_std0;
return YXML_OK;
}
break;
case YXMLS_xmldecl4:
if(yxml_isSP(ch)) {
x->state = YXMLS_xmldecl5;
return YXML_OK;
}
if(ch == (unsigned char)'?') {
x->state = YXMLS_xmldecl7;
return YXML_OK;
}
break;
case YXMLS_xmldecl5:
if(yxml_isSP(ch))
return YXML_OK;
if(ch == (unsigned char)'?') {
x->state = YXMLS_xmldecl7;
return YXML_OK;
}
if(ch == (unsigned char)'s') {
x->state = YXMLS_string;
x->stringstate = YXMLS_std0;
x->string = (unsigned char *)"tandalone";
return YXML_OK;
}
break;
case YXMLS_xmldecl6:
if(yxml_isSP(ch))
return YXML_OK;
if(ch == (unsigned char)'?') {
x->state = YXMLS_xmldecl7;
return YXML_OK;
}
break;
case YXMLS_xmldecl7:
if(ch == (unsigned char)'>') {
x->state = YXMLS_misc1;
return YXML_OK;
}
break;
}
return YXML_ESYN;
}
/* vim: set noet sw=4 ts=4: */