basic support for DFSan in libFuzzer
Change-Id: I44d5765c36b069d976f476751a437da15a31e316
diff --git a/lib/Fuzzer/FuzzerDFSan.h b/lib/Fuzzer/FuzzerDFSan.h
new file mode 100644
index 0000000..e62fe0b
--- /dev/null
+++ b/lib/Fuzzer/FuzzerDFSan.h
@@ -0,0 +1,39 @@
+//===- FuzzerDFSan.h - Internal header for the Fuzzer -----------*- C++ -* ===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// DFSan interface.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FUZZER_DFSAN_H
+#define LLVM_FUZZER_DFSAN_H
+
+#define LLVM_FUZZER_SUPPORTS_DFSAN 0
+#if defined(__has_include)
+# if __has_include(<sanitizer/dfsan_interface.h>)
+# if defined (__linux__)
+# undef LLVM_FUZZER_SUPPORTS_DFSAN
+# define LLVM_FUZZER_SUPPORTS_DFSAN 1
+# include <sanitizer/dfsan_interface.h>
+# endif // __linux__
+# endif
+#endif // defined(__has_include)
+
+extern "C" {
+__attribute__((weak))
+dfsan_label dfsan_create_label(const char *desc, void *userdata);
+__attribute__((weak))
+void dfsan_set_label(dfsan_label label, void *addr, size_t size);
+__attribute__((weak))
+void dfsan_add_label(dfsan_label label, void *addr, size_t size);
+__attribute__((weak))
+const struct dfsan_label_info *dfsan_get_label_info(dfsan_label label);
+__attribute__((weak))
+dfsan_label dfsan_read_label(const void *addr, size_t size);
+} // extern "C"
+
+#endif // LLVM_FUZZER_DFSAN_H
diff --git a/lib/Fuzzer/FuzzerInternal.h b/lib/Fuzzer/FuzzerInternal.h
index c266156..2d69cf5 100644
--- a/lib/Fuzzer/FuzzerInternal.h
+++ b/lib/Fuzzer/FuzzerInternal.h
@@ -39,6 +39,7 @@
void MinimizeCrashLoop(const Unit &U);
void ShuffleAndMinimize(UnitVector *V);
void InitializeTraceState();
+ void AssignTaintLabels(uint8_t *Data, size_t Size);
void RereadOutputCorpus(size_t MaxSize);
size_t secondsSinceProcessStartUp() {
diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp
index 7040928..99cd3cd 100644
--- a/lib/Fuzzer/FuzzerLoop.cpp
+++ b/lib/Fuzzer/FuzzerLoop.cpp
@@ -443,6 +443,7 @@
memcpy(DataCopy, Data, Size);
if (CurrentUnitData && CurrentUnitData != Data)
memcpy(CurrentUnitData, Data, Size);
+ AssignTaintLabels(DataCopy, Size);
CurrentUnitSize = Size;
AllocTracer.Start(Options.TraceMalloc);
UnitStartTime = system_clock::now();
diff --git a/lib/Fuzzer/FuzzerTraceState.cpp b/lib/Fuzzer/FuzzerTraceState.cpp
index a486223..2570ee8 100644
--- a/lib/Fuzzer/FuzzerTraceState.cpp
+++ b/lib/Fuzzer/FuzzerTraceState.cpp
@@ -9,6 +9,7 @@
// Data tracing.
//===----------------------------------------------------------------------===//
+#include "FuzzerDFSan.h"
#include "FuzzerDictionary.h"
#include "FuzzerInternal.h"
#include "FuzzerIO.h"
@@ -27,8 +28,38 @@
int ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr;
+struct LabelRange {
+ uint16_t Beg, End; // Range is [Beg, End), thus Beg==End is an empty range.
+
+ LabelRange(uint16_t Beg = 0, uint16_t End = 0) : Beg(Beg), End(End) {}
+
+ static LabelRange Join(LabelRange LR1, LabelRange LR2) {
+ if (LR1.Beg == LR1.End) return LR2;
+ if (LR2.Beg == LR2.End) return LR1;
+ return {std::min(LR1.Beg, LR2.Beg), std::max(LR1.End, LR2.End)};
+ }
+ LabelRange &Join(LabelRange LR) {
+ return *this = Join(*this, LR);
+ }
+ static LabelRange Singleton(const dfsan_label_info *LI) {
+ uint16_t Idx = (uint16_t)(uintptr_t)LI->userdata;
+ assert(Idx > 0);
+ return {(uint16_t)(Idx - 1), Idx};
+ }
+};
+
+struct TraceBasedMutation {
+ uint32_t Pos;
+ Word W;
+};
+
+
class TraceState {
public:
+ LabelRange GetLabelRange(dfsan_label L);
+ void DFSanMemcmpCallback(size_t CmpSize, const uint8_t *Data1,
+ const uint8_t *Data2, dfsan_label L1,
+ dfsan_label L2);
TraceState(MutationDispatcher &MD, const FuzzingOptions &Options,
const Fuzzer *F)
: MD(MD), Options(Options), F(F) {}
@@ -37,6 +68,7 @@
if (!Options.UseMemmem)
return;
RecordingMemmem = true;
+ NumMutations = 0;
InterestingWords.clear();
MD.ClearAutoDictionary();
}
@@ -44,6 +76,10 @@
void StopTraceRecording() {
if (!RecordingMemmem)
return;
+ for (size_t i = 0; i < NumMutations; i++) {
+ auto &M = Mutations[i];
+ MD.AddWordToAutoDictionary({M.W, M.Pos});
+ }
for (auto &W : InterestingWords)
MD.AddWordToAutoDictionary({W});
}
@@ -56,17 +92,81 @@
InterestingWords.insert(W);
}
+ void EnsureDfsanLabels(size_t Size) {
+ for (; LastDfsanLabel < Size; LastDfsanLabel++) {
+ dfsan_label L = dfsan_create_label("input", (void *)(LastDfsanLabel + 1));
+ // We assume that no one else has called dfsan_create_label before.
+ if (L != LastDfsanLabel + 1) {
+ Printf("DFSan labels are not starting from 1, exiting\n");
+ exit(1);
+ }
+ }
+ }
+
+ void AddMutation(uint32_t Pos, uint32_t Size, const uint8_t *Data) {
+ if (NumMutations >= kMaxMutations) return;
+ auto &M = Mutations[NumMutations++];
+ for (uint32_t i = 0; i< Size; i++) {
+ printf("Added the word: %d", Data[i]);
+ }
+ printf("\nmutation data adding is done\n");
+ M.Pos = Pos;
+ M.W.Set(Data, Size);
+ }
+
+ void AddMutation(uint32_t Pos, uint32_t Size, uint64_t Data) {
+ assert(Size <= sizeof(Data));
+ AddMutation(Pos, Size, reinterpret_cast<uint8_t*>(&Data));
+ }
+
+
private:
+ static const size_t kMaxMutations = 1 << 16;
+ size_t NumMutations;
+ TraceBasedMutation Mutations[kMaxMutations];
// TODO: std::set is too inefficient, need to have a custom DS here.
std::set<Word> InterestingWords;
MutationDispatcher &MD;
+ LabelRange LabelRanges[1 << (sizeof(dfsan_label) * 8)];
+ size_t LastDfsanLabel = 0;
const FuzzingOptions Options;
const Fuzzer *F;
};
static TraceState *TS;
+LabelRange TraceState::GetLabelRange(dfsan_label L) {
+ LabelRange &LR = LabelRanges[L];
+ if (LR.Beg < LR.End || L == 0)
+ return LR;
+ const dfsan_label_info *LI = dfsan_get_label_info(L);
+ if (LI->l1 || LI->l2)
+ return LR = LabelRange::Join(GetLabelRange(LI->l1), GetLabelRange(LI->l2));
+ return LR = LabelRange::Singleton(LI);
+}
+
+void TraceState::DFSanMemcmpCallback(size_t CmpSize, const uint8_t *Data1,
+ const uint8_t *Data2, dfsan_label L1,
+ dfsan_label L2) {
+ printf("FARAH: DFSanMemcmpCallBack!!\n");
+ // assert(ReallyHaveDFSan());
+ if (!F->InFuzzingThread()) return;
+ // if (L1 == 0 && L2 == 0)
+ // return; // Not actionable.
+ // if (L1 != 0 && L2 != 0)
+ // return; // Probably still actionable.
+ printf("we are here\n");
+ const uint8_t *Data = L1 ? Data2 : Data1;
+ LabelRange LR = L1 ? GetLabelRange(L1) : GetLabelRange(L2);
+ for (size_t Pos = LR.Beg; Pos + CmpSize <= LR.End; Pos++) {
+ AddMutation(Pos, CmpSize, Data);
+ if (Options.Verbosity >= 3)
+ Printf("DFSanMemcmpCallback: Pos %d Size %d\n", Pos, CmpSize);
+ }
+}
+
+
void Fuzzer::StartTraceRecording() {
if (!TS) return;
TS->StartTraceRecording();
@@ -88,6 +188,16 @@
return Len;
}
+void Fuzzer::AssignTaintLabels(uint8_t *Data, size_t Size) {
+ TS->EnsureDfsanLabels(Size);
+ for (size_t i = 0; i < Size; i++) {
+ printf("Oh yeeee :D we are assigning a label to byte number %zu of the input\n", i);
+ dfsan_set_label(i + 1, &Data[i], 1);
+ }
+}
+
+
+
// Finds min of (strlen(S1), strlen(S2)).
// Needed bacause one of these strings may actually be non-zero terminated.
static size_t InternalStrnlen2(const char *S1, const char *S2) {
@@ -101,6 +211,43 @@
using fuzzer::TS;
extern "C" {
+#define DFSAN_CMP_CALLBACK(N) \
+ void __dfsw___sanitizer_cov_trace_cmp##N(uint64_t Arg1, uint64_t Arg2, \
+ dfsan_label L1, dfsan_label L2) { \
+ }
+
+DFSAN_CMP_CALLBACK(1)
+DFSAN_CMP_CALLBACK(2)
+DFSAN_CMP_CALLBACK(4)
+DFSAN_CMP_CALLBACK(8)
+#undef DFSAN_CMP_CALLBACK
+
+void __dfsw___sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases,
+ dfsan_label L1, dfsan_label L2) {
+}
+
+void dfsan_weak_hook_memcmp(void *caller_pc, const void *s1, const void *s2,
+ size_t n, dfsan_label s1_label,
+ dfsan_label s2_label, dfsan_label n_label) {
+}
+
+void dfsan_weak_hook_strncmp(void *caller_pc, const char *s1, const char *s2,
+ size_t n, dfsan_label s1_label,
+ dfsan_label s2_label, dfsan_label n_label) {
+}
+
+void dfsan_weak_hook_strcmp(void *caller_pc, const char *s1, const char *s2,
+ dfsan_label s1_label, dfsan_label s2_label) {
+ // if (!RecordingMemcmp) return;
+ size_t Len1 = strlen(s1);
+ size_t Len2 = strlen(s2);
+ size_t N = std::min(Len1, Len2);
+ if (N <= 1) return; // Not interesting.
+ dfsan_label L1 = dfsan_read_label(s1, Len1);
+ dfsan_label L2 = dfsan_read_label(s2, Len2);
+ TS->DFSanMemcmpCallback(N, reinterpret_cast<const uint8_t *>(s1),
+ reinterpret_cast<const uint8_t *>(s2), L1, L2);
+}
// We may need to avoid defining weak hooks to stay compatible with older clang.
#ifndef LLVM_FUZZER_DEFINES_SANITIZER_WEAK_HOOOKS
diff --git a/lib/Fuzzer/build.sh b/lib/Fuzzer/build.sh
index 4556af5..4461526 100755
--- a/lib/Fuzzer/build.sh
+++ b/lib/Fuzzer/build.sh
@@ -2,7 +2,7 @@
LIBFUZZER_SRC_DIR=$(dirname $0)
CXX="${CXX:-clang}"
for f in $LIBFUZZER_SRC_DIR/*.cpp; do
- $CXX -g -O2 -fno-omit-frame-pointer -std=c++11 $f -c &
+ $CXX -g -O2 -fno-omit-frame-pointer -fPIC -std=c++11 $f -c &
done
wait
rm -f libFuzzer.a