basic support for DFSan in libFuzzer

Change-Id: I44d5765c36b069d976f476751a437da15a31e316
diff --git a/lib/Fuzzer/FuzzerDFSan.h b/lib/Fuzzer/FuzzerDFSan.h
new file mode 100644
index 0000000..e62fe0b
--- /dev/null
+++ b/lib/Fuzzer/FuzzerDFSan.h
@@ -0,0 +1,39 @@
+//===- FuzzerDFSan.h - Internal header for the Fuzzer -----------*- C++ -* ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// DFSan interface.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FUZZER_DFSAN_H
+#define LLVM_FUZZER_DFSAN_H
+
+#define LLVM_FUZZER_SUPPORTS_DFSAN 0
+#if defined(__has_include)
+# if __has_include(<sanitizer/dfsan_interface.h>)
+#  if defined (__linux__)
+#   undef LLVM_FUZZER_SUPPORTS_DFSAN
+#   define LLVM_FUZZER_SUPPORTS_DFSAN 1
+#   include <sanitizer/dfsan_interface.h>
+#  endif  // __linux__
+# endif
+#endif  // defined(__has_include)
+
+extern "C" {
+__attribute__((weak))
+dfsan_label dfsan_create_label(const char *desc, void *userdata);
+__attribute__((weak))
+void dfsan_set_label(dfsan_label label, void *addr, size_t size);
+__attribute__((weak))
+void dfsan_add_label(dfsan_label label, void *addr, size_t size);
+__attribute__((weak))
+const struct dfsan_label_info *dfsan_get_label_info(dfsan_label label);
+__attribute__((weak))
+dfsan_label dfsan_read_label(const void *addr, size_t size);
+}  // extern "C"
+
+#endif // LLVM_FUZZER_DFSAN_H
diff --git a/lib/Fuzzer/FuzzerInternal.h b/lib/Fuzzer/FuzzerInternal.h
index c266156..2d69cf5 100644
--- a/lib/Fuzzer/FuzzerInternal.h
+++ b/lib/Fuzzer/FuzzerInternal.h
@@ -39,6 +39,7 @@
   void MinimizeCrashLoop(const Unit &U);
   void ShuffleAndMinimize(UnitVector *V);
   void InitializeTraceState();
+  void AssignTaintLabels(uint8_t *Data, size_t Size);
   void RereadOutputCorpus(size_t MaxSize);
 
   size_t secondsSinceProcessStartUp() {
diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp
index 7040928..99cd3cd 100644
--- a/lib/Fuzzer/FuzzerLoop.cpp
+++ b/lib/Fuzzer/FuzzerLoop.cpp
@@ -443,6 +443,7 @@
   memcpy(DataCopy, Data, Size);
   if (CurrentUnitData && CurrentUnitData != Data)
     memcpy(CurrentUnitData, Data, Size);
+  AssignTaintLabels(DataCopy, Size);
   CurrentUnitSize = Size;
   AllocTracer.Start(Options.TraceMalloc);
   UnitStartTime = system_clock::now();
diff --git a/lib/Fuzzer/FuzzerTraceState.cpp b/lib/Fuzzer/FuzzerTraceState.cpp
index a486223..2570ee8 100644
--- a/lib/Fuzzer/FuzzerTraceState.cpp
+++ b/lib/Fuzzer/FuzzerTraceState.cpp
@@ -9,6 +9,7 @@
 // Data tracing.
 //===----------------------------------------------------------------------===//
 
+#include "FuzzerDFSan.h"
 #include "FuzzerDictionary.h"
 #include "FuzzerInternal.h"
 #include "FuzzerIO.h"
@@ -27,8 +28,38 @@
 
 int ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr;
 
+struct LabelRange {
+  uint16_t Beg, End;  // Range is [Beg, End), thus Beg==End is an empty range.
+
+  LabelRange(uint16_t Beg = 0, uint16_t End = 0) : Beg(Beg), End(End) {}
+
+  static LabelRange Join(LabelRange LR1, LabelRange LR2) {
+    if (LR1.Beg == LR1.End) return LR2;
+    if (LR2.Beg == LR2.End) return LR1;
+    return {std::min(LR1.Beg, LR2.Beg), std::max(LR1.End, LR2.End)};
+  }
+  LabelRange &Join(LabelRange LR) {
+    return *this = Join(*this, LR);
+  }
+  static LabelRange Singleton(const dfsan_label_info *LI) {
+    uint16_t Idx = (uint16_t)(uintptr_t)LI->userdata;
+    assert(Idx > 0);
+    return {(uint16_t)(Idx - 1), Idx};
+  }
+};
+
+struct TraceBasedMutation {
+  uint32_t Pos;
+  Word W;
+};
+
+
 class TraceState {
 public:
+  LabelRange GetLabelRange(dfsan_label L);
+  void DFSanMemcmpCallback(size_t CmpSize, const uint8_t *Data1,
+                           const uint8_t *Data2, dfsan_label L1,
+                           dfsan_label L2);
   TraceState(MutationDispatcher &MD, const FuzzingOptions &Options,
              const Fuzzer *F)
       : MD(MD), Options(Options), F(F) {}
@@ -37,6 +68,7 @@
     if (!Options.UseMemmem)
       return;
     RecordingMemmem = true;
+    NumMutations = 0;
     InterestingWords.clear();
     MD.ClearAutoDictionary();
   }
@@ -44,6 +76,10 @@
   void StopTraceRecording() {
     if (!RecordingMemmem)
       return;
+    for (size_t i = 0; i < NumMutations; i++) {
+      auto &M = Mutations[i];
+      MD.AddWordToAutoDictionary({M.W, M.Pos});
+    }
     for (auto &W : InterestingWords)
       MD.AddWordToAutoDictionary({W});
   }
@@ -56,17 +92,81 @@
     InterestingWords.insert(W);
   }
 
+  void EnsureDfsanLabels(size_t Size) {
+    for (; LastDfsanLabel < Size; LastDfsanLabel++) {
+      dfsan_label L = dfsan_create_label("input", (void *)(LastDfsanLabel + 1));
+      // We assume that no one else has called dfsan_create_label before.
+      if (L != LastDfsanLabel + 1) {
+        Printf("DFSan labels are not starting from 1, exiting\n");
+        exit(1);
+      }
+    }
+  }
+
+  void AddMutation(uint32_t Pos, uint32_t Size, const uint8_t *Data) {
+    if (NumMutations >= kMaxMutations) return;
+    auto &M = Mutations[NumMutations++];
+    for (uint32_t i = 0; i< Size; i++) {
+      printf("Added the word: %d", Data[i]);
+    }
+    printf("\nmutation data adding is done\n");
+    M.Pos = Pos;
+    M.W.Set(Data, Size);
+  }
+
+  void AddMutation(uint32_t Pos, uint32_t Size, uint64_t Data) {
+    assert(Size <= sizeof(Data));
+    AddMutation(Pos, Size, reinterpret_cast<uint8_t*>(&Data));
+  }
+
+
  private:
 
+  static const size_t kMaxMutations = 1 << 16;
+  size_t NumMutations;
+  TraceBasedMutation Mutations[kMaxMutations];
   // TODO: std::set is too inefficient, need to have a custom DS here.
   std::set<Word> InterestingWords;
   MutationDispatcher &MD;
+  LabelRange LabelRanges[1 << (sizeof(dfsan_label) * 8)];
+  size_t LastDfsanLabel = 0;
   const FuzzingOptions Options;
   const Fuzzer *F;
 };
 
 static TraceState *TS;
 
+LabelRange TraceState::GetLabelRange(dfsan_label L) {
+  LabelRange &LR = LabelRanges[L];
+  if (LR.Beg < LR.End || L == 0)
+    return LR;
+  const dfsan_label_info *LI = dfsan_get_label_info(L);
+  if (LI->l1 || LI->l2)
+    return LR = LabelRange::Join(GetLabelRange(LI->l1), GetLabelRange(LI->l2));
+  return LR = LabelRange::Singleton(LI);
+}
+
+void TraceState::DFSanMemcmpCallback(size_t CmpSize, const uint8_t *Data1,
+                                     const uint8_t *Data2, dfsan_label L1,
+                                     dfsan_label L2) {
+  printf("FARAH: DFSanMemcmpCallBack!!\n");
+  // assert(ReallyHaveDFSan());
+  if (!F->InFuzzingThread()) return;
+  // if (L1 == 0 && L2 == 0)
+  //   return;  // Not actionable.
+  // if (L1 != 0 && L2 != 0)
+  //   return;  // Probably still actionable.
+  printf("we are here\n");
+  const uint8_t *Data = L1 ? Data2 : Data1;
+  LabelRange LR = L1 ? GetLabelRange(L1) : GetLabelRange(L2);
+  for (size_t Pos = LR.Beg; Pos + CmpSize <= LR.End; Pos++) {
+    AddMutation(Pos, CmpSize, Data);
+    if (Options.Verbosity >= 3)
+      Printf("DFSanMemcmpCallback: Pos %d Size %d\n", Pos, CmpSize);
+  }
+}
+
+
 void Fuzzer::StartTraceRecording() {
   if (!TS) return;
   TS->StartTraceRecording();
@@ -88,6 +188,16 @@
   return Len;
 }
 
+void Fuzzer::AssignTaintLabels(uint8_t *Data, size_t Size) {
+  TS->EnsureDfsanLabels(Size);
+  for (size_t i = 0; i < Size; i++) {
+    printf("Oh yeeee :D we are assigning a label to byte number %zu of the input\n", i);
+    dfsan_set_label(i + 1, &Data[i], 1);
+  }
+}
+
+
+
 // Finds min of (strlen(S1), strlen(S2)).
 // Needed bacause one of these strings may actually be non-zero terminated.
 static size_t InternalStrnlen2(const char *S1, const char *S2) {
@@ -101,6 +211,43 @@
 using fuzzer::TS;
 
 extern "C" {
+#define DFSAN_CMP_CALLBACK(N)                                                  \
+  void __dfsw___sanitizer_cov_trace_cmp##N(uint64_t Arg1, uint64_t Arg2,       \
+                                           dfsan_label L1, dfsan_label L2) {   \
+  }
+
+DFSAN_CMP_CALLBACK(1)
+DFSAN_CMP_CALLBACK(2)
+DFSAN_CMP_CALLBACK(4)
+DFSAN_CMP_CALLBACK(8)
+#undef DFSAN_CMP_CALLBACK
+
+void __dfsw___sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases,
+                                         dfsan_label L1, dfsan_label L2) {
+}
+
+void dfsan_weak_hook_memcmp(void *caller_pc, const void *s1, const void *s2,
+                            size_t n, dfsan_label s1_label,
+                            dfsan_label s2_label, dfsan_label n_label) {
+}
+
+void dfsan_weak_hook_strncmp(void *caller_pc, const char *s1, const char *s2,
+                             size_t n, dfsan_label s1_label,
+                             dfsan_label s2_label, dfsan_label n_label) {
+}
+
+void dfsan_weak_hook_strcmp(void *caller_pc, const char *s1, const char *s2,
+                            dfsan_label s1_label, dfsan_label s2_label) {
+  // if (!RecordingMemcmp) return;
+  size_t Len1 = strlen(s1);
+  size_t Len2 = strlen(s2);
+  size_t N = std::min(Len1, Len2);
+  if (N <= 1) return;  // Not interesting.
+  dfsan_label L1 = dfsan_read_label(s1, Len1);
+  dfsan_label L2 = dfsan_read_label(s2, Len2);
+  TS->DFSanMemcmpCallback(N, reinterpret_cast<const uint8_t *>(s1),
+                          reinterpret_cast<const uint8_t *>(s2), L1, L2);
+}
 
 // We may need to avoid defining weak hooks to stay compatible with older clang.
 #ifndef LLVM_FUZZER_DEFINES_SANITIZER_WEAK_HOOOKS
diff --git a/lib/Fuzzer/build.sh b/lib/Fuzzer/build.sh
index 4556af5..4461526 100755
--- a/lib/Fuzzer/build.sh
+++ b/lib/Fuzzer/build.sh
@@ -2,7 +2,7 @@
 LIBFUZZER_SRC_DIR=$(dirname $0)
 CXX="${CXX:-clang}"
 for f in $LIBFUZZER_SRC_DIR/*.cpp; do
-  $CXX -g -O2 -fno-omit-frame-pointer -std=c++11 $f -c &
+  $CXX -g -O2 -fno-omit-frame-pointer -fPIC -std=c++11 $f -c &
 done
 wait
 rm -f libFuzzer.a