blob: a2bd97a383493fd8b29ff025e7567cdf509a7e7e [file] [log] [blame]
/*
* html.c: a libFuzzer target to test several HTML parser interfaces.
*
* See Copyright for the status of this software.
*/
#include <libxml/HTMLparser.h>
#include <libxml/HTMLtree.h>
#include <libxml/catalog.h>
#include "fuzz.h"
int
LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED,
char ***argv ATTRIBUTE_UNUSED) {
xmlFuzzMemSetup();
xmlInitParser();
#ifdef LIBXML_CATALOG_ENABLED
xmlInitializeCatalog();
#endif
xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc);
return 0;
}
int
LLVMFuzzerTestOneInput(const char *data, size_t size) {
static const size_t maxChunkSize = 128;
htmlDocPtr doc;
htmlParserCtxtPtr ctxt;
xmlOutputBufferPtr out;
const char *docBuffer;
size_t maxAlloc, docSize, consumed, chunkSize;
int opts;
xmlFuzzDataInit(data, size);
opts = (int) xmlFuzzReadInt(4);
maxAlloc = xmlFuzzReadInt(4) % (size + 1);
docBuffer = xmlFuzzReadRemaining(&docSize);
if (docBuffer == NULL) {
xmlFuzzDataCleanup();
return(0);
}
/* Pull parser */
xmlFuzzMemSetLimit(maxAlloc);
doc = htmlReadMemory(docBuffer, docSize, NULL, NULL, opts);
/*
* Also test the serializer. Call htmlDocContentDumpOutput with our
* own buffer to avoid encoding the output. The HTML encoding is
* excruciatingly slow (see htmlEntityValueLookup).
*/
out = xmlAllocOutputBuffer(NULL);
htmlDocContentDumpOutput(out, doc, NULL);
xmlOutputBufferClose(out);
xmlFreeDoc(doc);
/* Push parser */
xmlFuzzMemSetLimit(maxAlloc);
ctxt = htmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL,
XML_CHAR_ENCODING_NONE);
if (ctxt != NULL) {
htmlCtxtUseOptions(ctxt, opts);
for (consumed = 0; consumed < docSize; consumed += chunkSize) {
chunkSize = docSize - consumed;
if (chunkSize > maxChunkSize)
chunkSize = maxChunkSize;
htmlParseChunk(ctxt, docBuffer + consumed, chunkSize, 0);
}
htmlParseChunk(ctxt, NULL, 0, 1);
xmlFreeDoc(ctxt->myDoc);
htmlFreeParserCtxt(ctxt);
}
/* Cleanup */
xmlFuzzMemSetLimit(0);
xmlFuzzDataCleanup();
xmlResetLastError();
return(0);
}