| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <sys/types.h> |
| #include <fcntl.h> |
| |
| #ifdef _MSC_VER |
| #define XMLTOKAPI __declspec(dllimport) |
| #endif |
| #include "xmltok.h" |
| |
| #ifndef O_BINARY |
| #ifdef _O_BINARY |
| #define O_BINARY _O_BINARY |
| #else |
| #define O_BINARY 0 |
| #endif |
| #endif |
| |
| static void outOfMemory(); |
| |
| struct XmlTokBuffer { |
| char *buf; |
| char *end; |
| const char *ptr; |
| size_t size; |
| int fd; |
| int state; |
| int eof; |
| unsigned long endOffset; |
| const ENCODING *enc; |
| INIT_ENCODING initEnc; |
| }; |
| |
| #define XmlTokBufferOffset(tb) ((tb)->endOffset - ((tb)->end - (tb)->ptr)) |
| #define READSIZE 1024 |
| |
| void XmlTokBufferInit(struct XmlTokBuffer *tb, int fd) |
| { |
| tb->buf = malloc(READSIZE); |
| if (!tb->buf) |
| outOfMemory(); |
| tb->end = tb->buf; |
| tb->ptr = tb->buf; |
| tb->size = READSIZE; |
| tb->fd = fd; |
| tb->state = XML_PROLOG_STATE; |
| tb->eof = 0; |
| tb->endOffset = 0; |
| XmlInitEncoding(&(tb->initEnc), &(tb->enc)); |
| } |
| |
| void XmlTokBufferFree(struct XmlTokBuffer *tb) |
| { |
| free(tb->buf); |
| } |
| |
| int XmlGetToken(struct XmlTokBuffer *tb, const char **tokStart, size_t *tokLength) |
| { |
| int tok; |
| for (;;) { |
| int nBytes; |
| const char *start = tb->ptr; |
| tok = XmlTok(tb->enc, tb->state, start, tb->end, &tb->ptr); |
| if (tok >= 0) { |
| if (tok == XML_TOK_INSTANCE_START) |
| tb->state = XML_CONTENT_STATE; |
| *tokStart = start; |
| *tokLength = tb->ptr - start; |
| break; |
| } |
| if (tb->eof) |
| break; |
| /* Read in multiples of READSIZE, unless read() has previously |
| less than it could. */ |
| if (tb->end == tb->buf + tb->size) { |
| size_t keep = tb->end - tb->ptr; |
| if (keep == 0) |
| tb->ptr = tb->end = tb->buf; |
| else if (keep + READSIZE <= tb->size) { |
| unsigned nBlocks = (tb->size - keep)/READSIZE; |
| char *newEnd = tb->buf + tb->size - (nBlocks * READSIZE); |
| memmove(newEnd - keep, tb->ptr, keep); |
| tb->end = newEnd; |
| tb->ptr = newEnd - keep; |
| } |
| else { |
| char *newBuf, *newEnd; |
| tb->size += READSIZE; |
| newBuf = malloc(tb->size); |
| if (!newBuf) |
| outOfMemory(); |
| newEnd = newBuf + tb->size - READSIZE; |
| memcpy(newEnd - keep, tb->ptr, keep); |
| free(tb->buf); |
| tb->buf = newBuf; |
| tb->end = newEnd; |
| tb->ptr = newEnd - keep; |
| } |
| } |
| nBytes = read(tb->fd, tb->end, (tb->buf + tb->size) - tb->end); |
| if (nBytes == 0) { |
| tb->eof = 1; |
| break; |
| } |
| if (nBytes < 0) |
| return XML_TOK_NONE; |
| tb->end += nBytes; |
| tb->endOffset += nBytes; |
| } |
| return tok; |
| } |
| |
| int doFile(const char *filename) |
| { |
| unsigned nElements = 0; |
| struct XmlTokBuffer buf; |
| int fd = open(filename, O_RDONLY|O_BINARY); |
| |
| if (fd < 0) { |
| fprintf(stderr, "%s: cannot open\n", filename); |
| return 0; |
| } |
| XmlTokBufferInit(&buf, fd); |
| for (;;) { |
| const char *tokStr; |
| size_t tokLen; |
| int tok = XmlGetToken(&buf, &tokStr, &tokLen); |
| switch (tok) { |
| case XML_TOK_NONE: |
| if (nElements == 0) { |
| fprintf(stderr, "%s: no instance\n", filename); |
| return 0; |
| } |
| printf("%8u %s\n", nElements, filename); |
| close(fd); |
| XmlTokBufferFree(&buf); |
| return 1; |
| case XML_TOK_INVALID: |
| fprintf(stderr, "%s: well-formedness error at byte %lu\n", |
| filename, XmlTokBufferOffset(&buf)); |
| |
| close(fd); |
| XmlTokBufferFree(&buf); |
| return 0; |
| case XML_TOK_PARTIAL_CHAR: |
| fprintf(stderr, "%s: unclosed token with partial character started at byte %lu\n", |
| filename, XmlTokBufferOffset(&buf)); |
| close(fd); |
| XmlTokBufferFree(&buf); |
| return 0; |
| case XML_TOK_PARTIAL: |
| fprintf(stderr, "%s: unclosed token started at byte %lu\n", |
| filename, XmlTokBufferOffset(&buf)); |
| close(fd); |
| XmlTokBufferFree(&buf); |
| return 0; |
| case XML_TOK_START_TAG_WITH_ATTS: |
| case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: |
| case XML_TOK_START_TAG_NO_ATTS: |
| case XML_TOK_EMPTY_ELEMENT_NO_ATTS: |
| nElements++; |
| break; |
| default: |
| break; |
| } |
| } |
| |
| } |
| |
| int main(int argc, char **argv) |
| { |
| int i; |
| int ret = 0; |
| if (argc == 1) { |
| fprintf(stderr, "usage: %s filename ...\n", argv[0]); |
| return 1; |
| } |
| for (i = 1; i < argc; i++) |
| if (!doFile(argv[i])) |
| ret = 1; |
| return ret; |
| } |
| |
| static |
| void outOfMemory() |
| { |
| fprintf(stderr, "out of memory\n"); |
| exit(1); |
| } |
| |