blob: 1c67819703015aefbcd45a78a645a18bd0d53971 [file] [log] [blame] [edit]
/*
* html.c: a libFuzzer target to test several HTML parser interfaces.
*
* See Copyright for the status of this software.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <libxml/HTMLparser.h>
#include <libxml/HTMLtree.h>
#include <libxml/catalog.h>
#include "fuzz.h"
int
LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED,
char ***argv ATTRIBUTE_UNUSED) {
xmlFuzzMemSetup();
xmlInitParser();
#ifdef LIBXML_CATALOG_ENABLED
xmlInitializeCatalog();
xmlCatalogSetDefaults(XML_CATA_ALLOW_NONE);
#endif
return 0;
}
int
LLVMFuzzerTestOneInput(const char *data, size_t size) {
xmlParserCtxtPtr ctxt;
htmlDocPtr doc;
const char *docBuffer;
size_t failurePos, docSize, maxChunkSize;
int opts, errorCode;
#ifdef LIBXML_OUTPUT_ENABLED
xmlOutputBufferPtr out = NULL;
#endif
xmlFuzzDataInit(data, size);
opts = (int) xmlFuzzReadInt(4);
failurePos = xmlFuzzReadInt(4) % (size + 100);
maxChunkSize = xmlFuzzReadInt(4) % (size + size / 8 + 1);
if (maxChunkSize == 0)
maxChunkSize = 1;
docBuffer = xmlFuzzReadRemaining(&docSize);
if (docBuffer == NULL) {
xmlFuzzDataCleanup();
return(0);
}
/* Pull parser */
xmlFuzzInjectFailure(failurePos);
ctxt = htmlNewParserCtxt();
if (ctxt == NULL) {
errorCode = XML_ERR_NO_MEMORY;
} else {
xmlCtxtSetErrorHandler(ctxt, xmlFuzzSErrorFunc, NULL);
doc = htmlCtxtReadMemory(ctxt, docBuffer, docSize, NULL, NULL, opts);
errorCode = ctxt->errNo;
xmlFuzzCheckFailureReport("htmlCtxtReadMemory",
errorCode == XML_ERR_NO_MEMORY,
errorCode == XML_IO_EIO);
if (doc != NULL) {
xmlDocPtr copy;
#ifdef LIBXML_OUTPUT_ENABLED
const xmlChar *content;
/*
* Also test the serializer. Call htmlDocContentDumpOutput with our
* own buffer to avoid encoding the output. The HTML encoding is
* excruciatingly slow (see htmlEntityValueLookup).
*/
out = xmlAllocOutputBuffer(NULL);
htmlDocContentDumpOutput(out, doc, NULL);
content = xmlOutputBufferGetContent(out);
xmlFuzzCheckFailureReport("htmlDocContentDumpOutput",
content == NULL, 0);
if (content == NULL) {
xmlOutputBufferClose(out);
out = NULL;
}
#endif
copy = xmlCopyDoc(doc, 1);
xmlFuzzCheckFailureReport("xmlCopyNode", copy == NULL, 0);
xmlFreeDoc(copy);
xmlFreeDoc(doc);
}
htmlFreeParserCtxt(ctxt);
}
/* Push parser */
#ifdef LIBXML_PUSH_ENABLED
xmlFuzzInjectFailure(failurePos);
ctxt = htmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL,
XML_CHAR_ENCODING_NONE);
if (ctxt != NULL) {
size_t consumed;
int errorCodePush, numChunks, maxChunks;
xmlCtxtSetErrorHandler(ctxt, xmlFuzzSErrorFunc, NULL);
htmlCtxtUseOptions(ctxt, opts);
consumed = 0;
numChunks = 0;
maxChunks = 50 + docSize / 100;
while (numChunks == 0 ||
(consumed < docSize && numChunks < maxChunks)) {
size_t chunkSize;
int terminate;
numChunks += 1;
chunkSize = docSize - consumed;
if (numChunks < maxChunks && chunkSize > maxChunkSize) {
chunkSize = maxChunkSize;
terminate = 0;
} else {
terminate = 1;
}
htmlParseChunk(ctxt, docBuffer + consumed, chunkSize, terminate);
consumed += chunkSize;
}
errorCodePush = ctxt->errNo;
xmlFuzzCheckFailureReport("htmlParseChunk",
errorCodePush == XML_ERR_NO_MEMORY,
errorCodePush == XML_IO_EIO);
doc = ctxt->myDoc;
/*
* Push and pull parser differ in when exactly they
* stop parsing, and the error code is the *last* error
* reported, so we can't check whether the codes match.
*/
if (errorCode != XML_ERR_NO_MEMORY &&
errorCode != XML_IO_EIO &&
errorCodePush != XML_ERR_NO_MEMORY &&
errorCodePush != XML_IO_EIO &&
(errorCode == XML_ERR_OK) != (errorCodePush == XML_ERR_OK)) {
fprintf(stderr, "pull/push parser error mismatch: %d != %d\n",
errorCode, errorCodePush);
#if 0
FILE *f = fopen("c.html", "wb");
fwrite(docBuffer, docSize, 1, f);
fclose(f);
fprintf(stderr, "opts: %X\n", opts);
#endif
abort();
}
#ifdef LIBXML_OUTPUT_ENABLED
/*
* Verify that pull and push parser produce the same result.
*
* The NOBLANKS option doesn't work reliably in push mode.
*/
if ((opts & XML_PARSE_NOBLANKS) == 0 &&
errorCode == XML_ERR_OK &&
errorCodePush == XML_ERR_OK &&
out != NULL) {
xmlOutputBufferPtr outPush;
const xmlChar *content, *contentPush;
outPush = xmlAllocOutputBuffer(NULL);
htmlDocContentDumpOutput(outPush, doc, NULL);
content = xmlOutputBufferGetContent(out);
contentPush = xmlOutputBufferGetContent(outPush);
if (content != NULL && contentPush != NULL) {
size_t outSize = xmlOutputBufferGetSize(out);
if (outSize != xmlOutputBufferGetSize(outPush) ||
memcmp(content, contentPush, outSize) != 0) {
fprintf(stderr, "pull/push parser roundtrip "
"mismatch\n");
#if 0
FILE *f = fopen("c.html", "wb");
fwrite(docBuffer, docSize, 1, f);
fclose(f);
fprintf(stderr, "opts: %X\n", opts);
fprintf(stderr, "---\n%s\n---\n%s\n---\n",
xmlOutputBufferGetContent(out),
xmlOutputBufferGetContent(outPush));
#endif
abort();
}
}
xmlOutputBufferClose(outPush);
}
#endif
xmlFreeDoc(doc);
htmlFreeParserCtxt(ctxt);
}
#endif
/* Cleanup */
#ifdef LIBXML_OUTPUT_ENABLED
xmlOutputBufferClose(out);
#endif
xmlFuzzInjectFailure(0);
xmlFuzzDataCleanup();
xmlResetLastError();
return(0);
}
size_t
LLVMFuzzerCustomMutator(char *data, size_t size, size_t maxSize,
unsigned seed) {
static const xmlFuzzChunkDesc chunks[] = {
{ 4, XML_FUZZ_PROB_ONE / 10 }, /* opts */
{ 4, XML_FUZZ_PROB_ONE / 10 }, /* failurePos */
{ 0, 0 }
};
return xmlFuzzMutateChunks(chunks, data, size, maxSize, seed,
LLVMFuzzerMutate);
}