Forgot to add file for the previous change...

Add InstallFailureSignalHandler().  The function installs a signal handler that will dump useful information when the program crashes on certain signals such as SIGSEGV.


git-svn-id: https://google-glog.googlecode.com/svn/trunk@12 eb4d4688-79bd-11dd-afb4-1d65580434c0
diff --git a/m4/ac_have_sync_val_compare_and_swap.m4 b/m4/ac_have_sync_val_compare_and_swap.m4
new file mode 100644
index 0000000..9ff59c2
--- /dev/null
+++ b/m4/ac_have_sync_val_compare_and_swap.m4
@@ -0,0 +1,14 @@
+AC_DEFUN([AX_C___SYNC_VAL_COMPARE_AND_SWAP], [
+  AC_MSG_CHECKING(for __sync_val_compare_and_swap)
+  AC_CACHE_VAL(ac_cv___sync_val_compare_and_swap, [
+    AC_TRY_LINK(
+      [int main() { int a; if (__sync_val_compare_and_swap(&a, 0, 1)) return 1; return 0; }],
+      [],
+      ac_cv___sync_val_compare_and_swap=yes,
+      ac_cv___sync_val_compare_and_swap=no
+    )])
+  if test "$ac_cv___sync_val_compare_and_swap" = "yes"; then
+    AC_DEFINE(HAVE___SYNC_VAL_COMPARE_AND_SWAP, 1, [define if your compiler has __sync_val_compare_and_swap])
+  fi
+  AC_MSG_RESULT($ac_cv___sync_val_compare_and_swap)
+])
diff --git a/src/signalhandler.cc b/src/signalhandler.cc
new file mode 100644
index 0000000..c49abc6
--- /dev/null
+++ b/src/signalhandler.cc
@@ -0,0 +1,327 @@
+// Copyright 2008 Google Inc. All Rights Reserved.
+// Author: Satoru Takabayashi
+//
+// Implementation of InstallFailureSignalHandler().
+
+#include "utilities.h"
+#include "stacktrace.h"
+#include "symbolize.h"
+#include "glog/logging.h"
+
+#include <signal.h>
+#include <time.h>
+#ifdef HAVE_UCONTEXT_H
+# include <ucontext.h>
+#endif
+#include <algorithm>
+
+_START_GOOGLE_NAMESPACE_
+
+// There is a better way, but this is good enough in this file.
+#define NAIVE_ARRAYSIZE(a) (sizeof(a) / sizeof(*(a)))
+
+namespace {
+
+// Wrapper of __sync_val_compare_and_swap. If the GCC extension isn't
+// defined, we try the CPU specific logics (we only support x86 and
+// x86_64 for now) first, then use a naive implementation, which has a
+// race condition.
+template<typename T>
+inline T* sync_val_compare_and_swap(T** ptr, T* oldval, T* newval) {
+#if defined(HAVE___SYNC_VAL_COMPARE_AND_SWAP)
+  return __sync_val_compare_and_swap(ptr, oldval, newval);
+#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+  T* ret;
+  __asm__ __volatile__("lock; cmpxchg %1, (%2);"
+                       :"=a"(ret)
+                       :"r"(newval), "r"(ptr), "a"(oldval)
+                       :"memory", "cc");
+  return ret;
+#else
+  T* ret = *ptr;
+  if (ret == oldval) {
+    *ptr = newval;
+  }
+  return ret;
+#endif
+}
+
+// We'll install the failure signal handler for these signals.  We could
+// use strsignal() to get signal names, but we don't use it to avoid
+// introducing yet another #ifdef complication.
+//
+// The list should be synced with the comment in signalhandler.h.
+const struct {
+  int number;
+  const char *name;
+} kFailureSignals[] = {
+  { SIGSEGV, "SIGSEGV" },
+  { SIGILL, "SIGILL" },
+  { SIGFPE, "SIGFPE" },
+  { SIGABRT, "SIGABRT" },
+  { SIGBUS, "SIGBUS" },
+  { SIGTERM, "SIGTERM" },
+};
+
+// Returns the program counter from signal context, NULL if unknown.
+void* GetPC(void* ucontext_in_void) {
+#if defined(HAVE_UCONTEXT_H) && defined(PC_FROM_UCONTEXT)
+  if (ucontext_in_void != NULL) {
+    ucontext_t *context = reinterpret_cast<ucontext_t *>(ucontext_in_void);
+    return (void*)context->PC_FROM_UCONTEXT;
+  }
+#endif
+  return NULL;
+}
+
+// The class is used for formatting error messages.  We don't use printf()
+// as it's not async signal safe.
+class MinimalFormatter {
+ public:
+  MinimalFormatter(char *buffer, int size)
+      : buffer_(buffer),
+        cursor_(buffer),
+        end_(buffer + size) {
+  }
+
+  // Returns the number of bytes written in the buffer.
+  int num_bytes_written() const { return cursor_ - buffer_; }
+
+  // Appends string from "str" and updates the internal cursor.
+  void AppendString(const char* str) {
+    int i = 0;
+    while (str[i] != '\0' && cursor_ + i < end_) {
+      cursor_[i] = str[i];
+      ++i;
+    }
+    cursor_ += i;
+  }
+
+  // Formats "number" in "radix" and updates the internal cursor.
+  // Lowercase letters are used for 'a' - 'z'.
+  void AppendUint64(uint64 number, int radix) {
+    int i = 0;
+    while (cursor_ + i < end_) {
+      const int tmp = number % radix;
+      number /= radix;
+      cursor_[i] = (tmp < 10 ? '0' + tmp : 'a' + tmp - 10);
+      ++i;
+      if (number == 0) {
+        break;
+      }
+    }
+    // Reverse the bytes written.
+    std::reverse(cursor_, cursor_ + i);
+    cursor_ += i;
+  }
+
+  // Formats "number" as hexadecimal number, and updates the internal
+  // cursor.  Padding will be added in front if needed.
+  void AppendHexWithPadding(uint64 number, int width) {
+    char* start = cursor_;
+    AppendString("0x");
+    AppendUint64(number, 16);
+    // Move to right and add padding in front if needed.
+    if (cursor_ < start + width) {
+      const int64 delta = start + width - cursor_;
+      std::copy(start, cursor_, start + delta);
+      std::fill(start, start + delta, ' ');
+      cursor_ = start + width;
+    }
+  }
+
+ private:
+  char *buffer_;
+  char *cursor_;
+  const char * const end_;
+};
+
+// Writes the given data with the size to the standard error.
+void WriteToStderr(const char* data, int size) {
+  write(STDERR_FILENO, data, size);
+}
+
+// The writer function can be changed by InstallFailureWriter().
+void (*g_failure_writer)(const char* data, int size) = WriteToStderr;
+
+// Dumps time information.  We don't dump human-readable time information
+// as localtime() is not guaranteed to be async signal safe.
+void DumpTimeInfo() {
+  time_t time_in_sec = time(NULL);
+  char buf[256];  // Big enough for time info.
+  MinimalFormatter formatter(buf, sizeof(buf));
+  formatter.AppendString("*** Aborted at ");
+  formatter.AppendUint64(time_in_sec, 10);
+  formatter.AppendString(" (unix time)");
+  formatter.AppendString(" try \"date -d @");
+  formatter.AppendUint64(time_in_sec, 10);
+  formatter.AppendString("\" if you are using GNU date ***\n");
+  g_failure_writer(buf, formatter.num_bytes_written());
+}
+
+// Dumps information about the signal to STDERR.
+void DumpSignalInfo(int signal_number, siginfo_t *siginfo) {
+  // Get the signal name.
+  const char* signal_name = NULL;
+  for (int i = 0; i < NAIVE_ARRAYSIZE(kFailureSignals); ++i) {
+    if (signal_number == kFailureSignals[i].number) {
+      signal_name = kFailureSignals[i].name;
+    }
+  }
+
+  char buf[256];  // Big enough for signal info.
+  MinimalFormatter formatter(buf, sizeof(buf));
+
+  formatter.AppendString("*** ");
+  if (signal_name) {
+    formatter.AppendString(signal_name);
+  } else {
+    // Use the signal number if the name is unknown.  The signal name
+    // should be known, but just in case.
+    formatter.AppendString("Signal ");
+    formatter.AppendUint64(signal_number, 10);
+  }
+  formatter.AppendString(" (@0x");
+  formatter.AppendUint64(reinterpret_cast<uintptr_t>(siginfo->si_addr), 16);
+  formatter.AppendString(")");
+  formatter.AppendString(" received by PID ");
+  formatter.AppendUint64(getpid(), 10);
+  formatter.AppendString(" (TID 0x");
+  // We assume pthread_t is an integral number or a pointer, rather
+  // than a complex struct.  In some environments, pthread_self()
+  // returns an int64 but in some other environments pthread_self()
+  // returns a pointer.  Hence we use C-style cast here, rather than
+  // reinterpret/static_cast, to support both types of environments.
+  formatter.AppendUint64((int)pthread_self(), 16);
+  formatter.AppendString(") ");
+  // Only linux has the PID of the signal sender in si_pid.
+#ifdef OS_LINUX
+  formatter.AppendString("from PID ");
+  formatter.AppendUint64(siginfo->si_pid, 10);
+  formatter.AppendString("; ");
+#endif
+  formatter.AppendString("stack trace: ***\n");
+  g_failure_writer(buf, formatter.num_bytes_written());
+}
+
+// Dumps information about the stack frame to STDERR.
+void DumpStackFrameInfo(const char* prefix, void* pc) {
+  // Get the symbol name.
+  const char *symbol = "(unknown)";
+  char symbolized[1024];  // Big enough for a sane symbol.
+  // Symbolizes the previous address of pc because pc may be in the
+  // next function.
+  if (Symbolize(reinterpret_cast<char *>(pc) - 1,
+                symbolized, sizeof(symbolized))) {
+    symbol = symbolized;
+  }
+
+  char buf[1024];  // Big enough for stack frame info.
+  MinimalFormatter formatter(buf, sizeof(buf));
+
+  formatter.AppendString(prefix);
+  formatter.AppendString("@ ");
+  const int width = 2 * sizeof(void*) + 2;  // + 2  for "0x".
+  formatter.AppendHexWithPadding(reinterpret_cast<uintptr_t>(pc), width);
+  formatter.AppendString(" ");
+  formatter.AppendString(symbol);
+  formatter.AppendString("\n");
+  g_failure_writer(buf, formatter.num_bytes_written());
+}
+
+// Invoke the default signal handler.
+void InvokeDefaultSignalHandler(int signal_number) {
+  struct sigaction sig_action = {};  // Zero-clear.
+  sigemptyset(&sig_action.sa_mask);
+  sig_action.sa_handler = SIG_DFL;
+  sigaction(signal_number, &sig_action, NULL);
+  kill(getpid(), signal_number);
+}
+
+// This variable is used for protecting FailureSignalHandler() from
+// dumping stuff while another thread is doing it.  Our policy is to let
+// the first thread dump stuff and let other threads to wait.
+// See also comments in FailureSignalHandler().
+static pthread_t* g_entered_thread_id_pointer = NULL;
+
+// Dumps signal and stack frame information, and invokes the default
+// signal handler once our job is done.
+void FailureSignalHandler(int signal_number,
+                          siginfo_t *signal_info,
+                          void *ucontext) {
+  // First check if we've already entered the function.  We use an atomic
+  // compare and swap operation for platforms that support it.  For other
+  // platforms, we use a naive method that could lead to a subtle race.
+
+  // We assume pthread_self() is async signal safe, though it's not
+  // officially guaranteed.
+  pthread_t my_thread_id = pthread_self();
+  // NOTE: We could simply use pthread_t rather than pthread_t* for this,
+  // if pthread_self() is guaranteed to return non-zero value for thread
+  // ids, but there is no such guarantee.  We need to distinguish if the
+  // old value (value returned from __sync_val_compare_and_swap) is
+  // different from the original value (in this case NULL).
+  pthread_t* old_thread_id_pointer =
+      sync_val_compare_and_swap(&g_entered_thread_id_pointer,
+                                static_cast<pthread_t*>(NULL),
+                                &my_thread_id);
+  if (old_thread_id_pointer != NULL) {
+    // We've already entered the signal handler.  What should we do?
+    if (pthread_equal(my_thread_id, *g_entered_thread_id_pointer)) {
+      // It looks the current thread is reentering the signal handler.
+      // Something must be going wrong (maybe we are reentering by another
+      // type of signal?).  Kill ourself by the default signal handler.
+      InvokeDefaultSignalHandler(signal_number);
+    }
+    // Another thread is dumping stuff.  Let's wait until that thread
+    // finishes the job and kills the process.
+    while (true) {
+      sleep(1);
+    }
+  }
+  // This is the first time we enter the signal handler.  We are going to
+  // do some interesting stuff from here.
+  // TODO(satorux): We might want to set timeout here using alarm(), but
+  // mixing alarm() and sleep() can be a bad idea.
+
+  // First dump time info.
+  DumpTimeInfo();
+
+  // Get the program counter from ucontext.
+  void *pc = GetPC(ucontext);
+  DumpStackFrameInfo("PC: ", pc);
+
+#ifdef HAVE_STACKTRACE
+  // Get the stack traces.
+  void *stack[32];
+  // +1 to exclude this function.
+  const int depth = GetStackTrace(stack, NAIVE_ARRAYSIZE(stack), 1);
+  DumpSignalInfo(signal_number, signal_info);
+  // Dump the stack traces.
+  for (int i = 0; i < depth; ++i) {
+    DumpStackFrameInfo("    ", stack[i]);
+  }
+#endif
+  // Kill ourself by the default signal handler.
+  InvokeDefaultSignalHandler(signal_number);
+}
+
+}  // namespace
+
+void InstallFailureSignalHandler() {
+  // Build the sigaction struct.
+  struct sigaction sig_action = {};  // Zero-clear.
+  sigemptyset(&sig_action.sa_mask);
+  sig_action.sa_flags |= SA_SIGINFO;
+  sig_action.sa_sigaction = &FailureSignalHandler;
+
+  for (int i = 0; i < NAIVE_ARRAYSIZE(kFailureSignals); ++i) {
+    CHECK_ERR(sigaction(kFailureSignals[i].number, &sig_action, NULL));
+  }
+}
+
+void InstallFailureWriter(void (*writer)(const char* data, int size)) {
+  g_failure_writer = writer;
+}
+
+_END_GOOGLE_NAMESPACE_
diff --git a/src/signalhandler_unittest.cc b/src/signalhandler_unittest.cc
new file mode 100644
index 0000000..5dc3b68
--- /dev/null
+++ b/src/signalhandler_unittest.cc
@@ -0,0 +1,54 @@
+// Copyright 2008 Google Inc. All Rights Reserved.
+// Author: satorux@google.com (Satoru Takabayashi)
+//
+// This is a helper binary for testing signalhandler.cc.  The actual test
+// is done in signalhandler_unittest.sh.
+
+#include "utilities.h"
+
+#include <pthread.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string>
+#include "glog/logging.h"
+
+using namespace GOOGLE_NAMESPACE;
+
+void* DieInThread(void*) {
+  fprintf(stderr, "0x%lx is dying\n", pthread_self());
+  // Use volatile to prevent from these to be optimized away.
+  volatile int a = 0;
+  volatile int b = 1 / a;
+}
+
+void WriteToStdout(const char* data, int size) {
+  write(STDOUT_FILENO, data, size);
+}
+
+int main(int argc, char **argv) {
+#ifdef HAVE_STACKTRACE
+  InitGoogleLogging(argv[0]);
+  InstallFailureSignalHandler();
+  const std::string command = argc > 1 ? argv[1] : "none";
+  if (command == "segv") {
+    // We assume 0xDEAD is not writable.
+    int *a = (int*)0xDEAD;
+    *a = 0;
+  } else if (command == "loop") {
+    fprintf(stderr, "looping\n");
+    while (true);
+  } else if (command == "die_in_thread") {
+    pthread_t thread;
+    pthread_create(&thread, NULL, &DieInThread, NULL);
+    pthread_join(thread, NULL);
+  } else if (command == "dump_to_stdout") {
+    InstallFailureWriter(WriteToStdout);
+    abort();
+  } else {
+    // Tell the shell script
+    puts("OK");
+  }
+#endif
+  return 0;
+}
diff --git a/src/signalhandler_unittest.sh b/src/signalhandler_unittest.sh
new file mode 100755
index 0000000..b139a5e
--- /dev/null
+++ b/src/signalhandler_unittest.sh
@@ -0,0 +1,90 @@
+#! /bin/sh
+# Copyright 2008 Google, Inc.  All Rights Reserved.
+# Author: Satoru Takabayashi
+#
+# Unit tests for signalhandler.cc.
+
+die () {
+    echo $1
+    exit 1
+}
+
+BINDIR=".libs"
+LIBGLOG="$BINDIR/libglog.so"
+
+BINARY="$BINDIR/signalhandler_unittest"
+
+# Remove temporary files.
+rm -f signalhandler.out*
+
+if test -e "$BINARY"; then
+  # We need shared object.
+  export LD_LIBRARY_PATH=$BINDIR
+  export DYLD_LIBRARY_PATH=$BINDIR
+else
+  # For windows
+  BINARY="./signalhandler_unittest.exe"
+  if ! test -e "$BINARY"; then
+    echo "We coundn't find demangle_unittest binary."
+    exit 1
+  fi
+fi
+
+if [ x`$BINARY` != 'xOK' ]; then
+  echo "PASS (No stacktrace support. We don't run this test.)"
+  exit 0
+fi
+
+# Test for a case the program kills itself by SIGSEGV.
+$BINARY segv 2> signalhandler.out1
+for pattern in SIGSEGV 0xdead main "Aborted at [0-9]"; do
+  if ! grep --quiet "$pattern" signalhandler.out1; then
+    die "'$pattern' should appear in the output"
+  fi
+done
+
+# Test for a case the program is killed by this shell script.
+# $! = the process id of the last command run in the background.
+# $$ = the process id of this shell.
+$BINARY loop 2> signalhandler.out2 &
+# Wait until "looping" is written in the file.  This indicates the program
+# is ready to accept signals.
+while true; do
+  if grep --quiet looping signalhandler.out2; then
+    break
+  fi
+done
+kill -TERM $!
+wait $!
+
+from_pid=''
+# Only linux has the process ID of the signal sender.
+if [ x`uname` = "xLinux" ]; then
+  from_pid="from PID $$"
+fi
+for pattern in SIGTERM "by PID $!" "$from_pid" main "Aborted at [0-9]"; do
+  if ! grep --quiet "$pattern" signalhandler.out2; then
+    die "'$pattern' should appear in the output"
+  fi
+done
+
+# Test for a case the program dies in a non-main thread.
+$BINARY die_in_thread 2> signalhandler.out3
+EXPECTED_TID="`sed 's/ .*//' signalhandler.out3`"
+
+for pattern in SIGFPE DieInThread "TID $EXPECTED_TID" "Aborted at [0-9]"; do
+  if ! grep --quiet "$pattern" signalhandler.out3; then
+    die "'$pattern' should appear in the output"
+  fi
+done
+
+# Test for a case the program installs a custom failure writer that writes
+# stuff to stdout instead of stderr.
+$BINARY dump_to_stdout 1> signalhandler.out4
+for pattern in SIGABRT main "Aborted at [0-9]"; do
+  if ! grep --quiet "$pattern" signalhandler.out4; then
+    die "'$pattern' should appear in the output"
+  fi
+done
+
+echo PASS