blob: 009b5ab84561668e77d56b089cd9934c6ab4a255 [file] [log] [blame]
/* Copyright (C) 2021 Free Software Foundation, Inc.
Contributed by Oracle.
This file is part of GNU Binutils.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, 51 Franklin Street - Fifth Floor, Boston,
MA 02110-1301, USA. */
#include "config.h"
#include <alloca.h>
#include <dlfcn.h>
#include <stdlib.h>
#include <signal.h>
#include <unistd.h>
#include <pthread.h>
#include "gp-defs.h"
#include "collector.h"
#include "gp-experiment.h"
#include "memmgr.h"
#include "tsd.h"
/* Get dynamic module interface*/
#include "collector_module.h"
/* Get definitions for SP_LEAF_CHECK_MARKER, SP_TRUNC_STACK_MARKER */
#include "data_pckts.h"
#if ARCH(SPARC)
struct frame
{
long fr_local[8]; /* saved locals */
long fr_arg[6]; /* saved arguments [0 - 5] */
struct frame *fr_savfp; /* saved frame pointer */
long fr_savpc; /* saved program counter */
#if WSIZE(32)
char *fr_stret; /* struct return addr */
#endif
long fr_argd[6]; /* arg dump area */
long fr_argx[1]; /* array of args past the sixth */
};
#elif ARCH(Intel)
struct frame
{
unsigned long fr_savfp;
unsigned long fr_savpc;
};
#endif
/* Set the debug trace level */
#define DBG_LT0 0
#define DBG_LT1 1
#define DBG_LT2 2
#define DBG_LT3 3
int (*__collector_VM_ReadByteInstruction)(unsigned char *) = NULL;
#define VM_NO_ACCESS (-1)
#define VM_NOT_VM_MEMORY (-2)
#define VM_NOT_X_SEGMENT (-3)
#define isInside(p, bgn, end) ((p) >= (bgn) && (p) < (end))
/*
* Weed through all the arch dependent stuff to get the right definition
* for 'pc' in the ucontext structure. The system header files are mess
* dealing with all the arch (just look for PC, R_PC, REG_PC).
*
*/
#if ARCH(SPARC)
#define IN_BARRIER(x) \
( barrier_hdl && \
(unsigned long)x >= barrier_hdl && \
(unsigned long)x < barrier_hdlx )
static unsigned long barrier_hdl = 0;
static unsigned long barrier_hdlx = 0;
#if WSIZE(64)
#define STACK_BIAS 2047
#define IN_TRAP_HANDLER(x) \
( misalign_hdl && \
(unsigned long)x >= misalign_hdl && \
(unsigned long)x < misalign_hdlx )
static unsigned long misalign_hdl = 0;
static unsigned long misalign_hdlx = 0;
#elif WSIZE(32)
#define STACK_BIAS 0
#endif
#if WSIZE(64)
#define GET_GREG(ctx,reg) (((ucontext_t*)ctx)->uc_mcontext.mc_gregs[(reg)])
#define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.mc_gregs[MC_O6])
#define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.mc_gregs[MC_PC])
#else
#define GET_GREG(ctx,reg) (((ucontext_t*)ctx)->uc_mcontext.gregs[(reg)])
#define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_O6])
#define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_PC])
#endif
#elif ARCH(Intel)
#include "opcodes/disassemble.h"
static int
fprintf_func (void *arg ATTRIBUTE_UNUSED, const char *fmt ATTRIBUTE_UNUSED, ...)
{
return 0;
}
static int
fprintf_styled_func (void *arg ATTRIBUTE_UNUSED,
enum disassembler_style st ATTRIBUTE_UNUSED,
const char *fmt ATTRIBUTE_UNUSED, ...)
{
return 0;
}
/* Get LENGTH bytes from info's buffer, at target address memaddr.
Transfer them to myaddr. */
static int
read_memory_func (bfd_vma memaddr, bfd_byte *myaddr, unsigned int length,
disassemble_info *info)
{
unsigned int opb = info->octets_per_byte;
size_t end_addr_offset = length / opb;
size_t max_addr_offset = info->buffer_length / opb;
size_t octets = (memaddr - info->buffer_vma) * opb;
if (memaddr < info->buffer_vma
|| memaddr - info->buffer_vma > max_addr_offset
|| memaddr - info->buffer_vma + end_addr_offset > max_addr_offset
|| (info->stop_vma && (memaddr >= info->stop_vma
|| memaddr + end_addr_offset > info->stop_vma)))
return -1;
memcpy (myaddr, info->buffer + octets, length);
return 0;
}
static void
print_address_func (bfd_vma addr ATTRIBUTE_UNUSED,
disassemble_info *info ATTRIBUTE_UNUSED) { }
static asymbol *
symbol_at_address_func (bfd_vma addr ATTRIBUTE_UNUSED,
disassemble_info *info ATTRIBUTE_UNUSED)
{
return NULL;
}
static bfd_boolean
symbol_is_valid (asymbol *sym ATTRIBUTE_UNUSED,
disassemble_info *info ATTRIBUTE_UNUSED)
{
return TRUE;
}
static void
memory_error_func (int status ATTRIBUTE_UNUSED, bfd_vma addr ATTRIBUTE_UNUSED,
disassemble_info *info ATTRIBUTE_UNUSED) { }
#if WSIZE(32)
#define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_EIP])
#define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_ESP])
#define GET_FP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_EBP])
#elif WSIZE(64)
#define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RIP])
#define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RSP])
#define GET_FP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RBP])
#endif /* WSIZE() */
#elif ARCH(Aarch64)
#define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.regs[15])
#define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.regs[13])
#define GET_FP(ctx) (((ucontext_t*)ctx)->uc_mcontext.regs[14])
#endif /* ARCH() */
/*
* FILL_CONTEXT() for all platforms
* Could use getcontext() except:
* - it's not guaranteed to be async signal safe
* - it's a system call and not that lightweight
* - it's not portable as of POSIX.1-2008
* So we just use low-level mechanisms to fill in the few fields we need.
*/
#if ARCH(SPARC)
#if WSIZE(32)
#define FILL_CONTEXT(context) \
{ \
greg_t fp; \
__asm__ __volatile__( "mov %%i6, %0" : "=r" (fp) ); \
__asm__ __volatile__( "ta 3" ); \
GET_SP(context) = fp; \
GET_PC(context) = (greg_t)0; \
}
#elif WSIZE(64)
#define FILL_CONTEXT(context) \
{ \
greg_t fp; \
__asm__ __volatile__( "mov %%i6, %0" : "=r" (fp) ); \
__asm__ __volatile__( "flushw" ); \
GET_SP(context) = fp; \
GET_PC(context) = (greg_t)0; \
}
#endif /* WSIZE() */
#elif ARCH(Intel)
#define FILL_CONTEXT(context) \
{ \
context->uc_link = NULL; \
void *sp = __collector_getsp(); \
GET_SP(context) = (intptr_t)sp; \
GET_FP(context) = (intptr_t)__collector_getfp(); \
GET_PC(context) = (intptr_t)__collector_getpc(); \
context->uc_stack.ss_sp = sp; \
context->uc_stack.ss_size = 0x100000; \
}
#elif ARCH(Aarch64)
#define FILL_CONTEXT(context) \
{ CALL_UTIL (getcontext) (context); \
context->uc_mcontext.sp = (__u64) __builtin_frame_address(0); \
}
#endif /* ARCH() */
static int
getByteInstruction (unsigned char *p)
{
if (__collector_VM_ReadByteInstruction)
{
int v = __collector_VM_ReadByteInstruction (p);
if (v != VM_NOT_VM_MEMORY)
return v;
}
return *p;
}
struct DataHandle *dhndl = NULL;
static unsigned unwind_key = COLLECTOR_TSD_INVALID_KEY;
/* To support two OpenMP API's we use a pointer
* to the actual function.
*/
int (*__collector_omp_stack_trace)(char*, int, hrtime_t, void*) = NULL;
int (*__collector_mpi_stack_trace)(char*, int, hrtime_t) = NULL;
#define DEFAULT_MAX_NFRAMES 256
static int max_native_nframes = DEFAULT_MAX_NFRAMES;
static int max_java_nframes = DEFAULT_MAX_NFRAMES;
#define NATIVE_FRAME_BYTES(nframes) ( ((nframes)+1) * sizeof(long) )
#define JAVA_FRAME_BYTES(nframes) ( ((nframes)+1) * sizeof(long) * 2 + 16 )
#define OVERHEAD_BYTES ( 2 * sizeof(long) + 2 * sizeof(Stack_info) )
#define ROOT_UID 801425552975190205ULL
#define ROOT_UID_INV 92251691606677ULL
#define ROOT_IDX 13907816567264074199ULL
#define ROOT_IDX_INV 2075111ULL
#define UIDTableSize 1048576
static volatile uint64_t *UIDTable = NULL;
static volatile int seen_omp = 0;
static int stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode);
static FrameInfo compute_uid (Frame_packet *frp);
static int omp_no_walk = 0;
#if ARCH(Intel)
#define ValTableSize 1048576
#define OmpValTableSize 65536
static unsigned long *AddrTable_RA_FROMFP = NULL; // Cache for RA_FROMFP pcs
static unsigned long *AddrTable_RA_EOSTCK = NULL; // Cache for RA_EOSTCK pcs
static struct WalkContext *OmpCurCtxs = NULL;
static struct WalkContext *OmpCtxs = NULL;
static uint32_t *OmpVals = NULL;
static unsigned long *OmpRAs = NULL;
static unsigned long adjust_ret_addr (unsigned long ra, unsigned long segoff, unsigned long tend);
static int parse_x86_AVX_instruction (unsigned char *pc);
struct WalkContext
{
unsigned long pc;
unsigned long sp;
unsigned long fp;
unsigned long ln;
unsigned long sbase; /* stack boundary */
unsigned long tbgn; /* current memory segment start */
unsigned long tend; /* current memory segment end */
};
#endif
#if defined(DEBUG) && ARCH(Intel)
#include <execinfo.h>
static void
dump_stack (int nline)
{
if ((__collector_tracelevel & SP_DUMP_STACK) == 0)
return;
enum Constexpr { MAX_SIZE = 1024 };
void *array[MAX_SIZE];
size_t sz = backtrace (array, MAX_SIZE);
char **strings = backtrace_symbols (array, sz);
DprintfT (SP_DUMP_STACK, "\ndump_stack: %d size=%d\n", nline, (int) sz);
for (int i = 0; i < sz; i++)
DprintfT (SP_DUMP_STACK, " %3d: %p %s\n", i, array[i],
strings[i] ? strings[i] : "???");
}
#define dump_targets(nline, ntrg, targets) \
if ((__collector_tracelevel & SP_DUMP_UNWIND) != 0) \
for(int i = 0; i < ntrg; i++) \
DprintfT (SP_DUMP_UNWIND, " %2d: 0x%lx\n", i, (long) targets[i])
#else
#define dump_stack(x)
#define dump_targets(nline, ntrg, targets)
#endif
void
__collector_ext_unwind_key_init (int isPthread, void * stack)
{
void * ptr = __collector_tsd_get_by_key (unwind_key);
if (ptr == NULL)
{
TprintfT (DBG_LT2, "__collector_ext_unwind_key_init: cannot get tsd\n");
return;
}
if (isPthread)
{
size_t stack_size = 0;
void *stack_addr = 0;
pthread_t pthread = pthread_self ();
pthread_attr_t attr;
int err = pthread_getattr_np (pthread, &attr);
TprintfT (DBG_LT1, "__collector_ext_unwind_key_init: pthread: 0x%lx err: %d\n", pthread, err);
if (err == 0)
{
err = pthread_attr_getstack (&attr, &stack_addr, &stack_size);
if (err == 0)
stack_addr = (char*) stack_addr + stack_size;
TprintfT (DBG_LT1, "__collector_ext_unwind_key_init: stack_size=0x%lx eos=%p err=%d\n",
(long) stack_size, stack_addr, err);
err = pthread_attr_destroy (&attr);
TprintfT (DBG_LT1, "__collector_ext_unwind_key_init: destroy: %d\n", err);
}
*(void**) ptr = stack_addr;
}
else
*(void**) ptr = stack; // cloned thread
}
void
__collector_ext_unwind_init (int record)
{
int sz = UIDTableSize * sizeof (*UIDTable);
UIDTable = (uint64_t*) __collector_allocCSize (__collector_heap, sz, 1);
if (UIDTable == NULL)
{
__collector_terminate_expt ();
return;
}
CALL_UTIL (memset)((void*) UIDTable, 0, sz);
char *str = CALL_UTIL (getenv)("GPROFNG_JAVA_MAX_CALL_STACK_DEPTH");
if (str != NULL && *str != 0)
{
char *endptr;
int n = CALL_UTIL (strtol)(str, &endptr, 0);
if (endptr != str && n >= 0)
{
if (n < 5)
n = 5;
if (n > MAX_STACKDEPTH)
n = MAX_STACKDEPTH;
max_java_nframes = n;
}
}
str = CALL_UTIL (getenv)("GPROFNG_MAX_CALL_STACK_DEPTH");
if (str != NULL && *str != 0)
{
char *endptr = str;
int n = CALL_UTIL (strtol)(str, &endptr, 0);
if (endptr != str && n >= 0)
{
if (n < 5)
n = 5;
if (n > MAX_STACKDEPTH)
n = MAX_STACKDEPTH;
max_native_nframes = n;
}
}
TprintfT (DBG_LT0, "GPROFNG_MAX_CALL_STACK_DEPTH=%d GPROFNG_JAVA_MAX_CALL_STACK_DEPTH=%d\n",
max_native_nframes, max_java_nframes);
omp_no_walk = 1;
if (__collector_VM_ReadByteInstruction == NULL)
__collector_VM_ReadByteInstruction = (int(*)()) dlsym (RTLD_DEFAULT, "Async_VM_ReadByteInstruction");
#if ARCH(SPARC)
#if WSIZE(64)
misalign_hdl = (unsigned long) dlsym (RTLD_DEFAULT, "__misalign_trap_handler");
misalign_hdlx = (unsigned long) dlsym (RTLD_DEFAULT, "__misalign_trap_handler_end");
if (misalign_hdlx == 0)
misalign_hdlx = misalign_hdl + 292;
barrier_hdl = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_");
barrier_hdlx = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_Dummy_");
if (barrier_hdlx == 0)
barrier_hdl = 0;
#else
barrier_hdl = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_");
barrier_hdlx = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_Dummy_");
if (barrier_hdlx == 0)
barrier_hdl = 0;
#endif /* WSIZE() */
#elif ARCH(Intel)
sz = ValTableSize * sizeof (*AddrTable_RA_FROMFP);
AddrTable_RA_FROMFP = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1);
sz = ValTableSize * sizeof (*AddrTable_RA_EOSTCK);
AddrTable_RA_EOSTCK = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1);
if (omp_no_walk && (__collector_omp_stack_trace != NULL || __collector_mpi_stack_trace != NULL))
{
sz = OmpValTableSize * sizeof (*OmpCurCtxs);
OmpCurCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1);
sz = OmpValTableSize * sizeof (*OmpCtxs);
OmpCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1);
sz = OmpValTableSize * sizeof (*OmpVals);
OmpVals = (uint32_t*) __collector_allocCSize (__collector_heap, sz, 1);
sz = OmpValTableSize * sizeof (*OmpRAs);
OmpRAs = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1);
if (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL)
{
TprintfT (0, "unwind_init() ERROR: failed; terminating experiment\n");
__collector_terminate_expt ();
return;
}
}
#endif /* ARCH() */
if (record)
{
dhndl = __collector_create_handle (SP_FRINFO_FILE);
__collector_log_write ("<%s name=\"%s\" format=\"binary\"/>\n", SP_TAG_DATAPTR, SP_FRINFO_FILE);
}
unwind_key = __collector_tsd_create_key (sizeof (void*), NULL, NULL);
if (unwind_key == COLLECTOR_TSD_INVALID_KEY)
{
TprintfT (0, "unwind_init: ERROR: TSD key create failed.\n");
__collector_log_write ("<%s kind=\"%s\" id=\"%d\">TSD key not created</%s>\n",
SP_TAG_EVENT, SP_JCMD_CERROR, COL_ERROR_GENERAL, SP_TAG_EVENT);
return;
}
TprintfT (0, "unwind_init() completed normally\n");
return;
}
void
__collector_ext_unwind_close ()
{
__collector_delete_handle (dhndl);
dhndl = NULL;
}
void*
__collector_ext_return_address (unsigned level)
{
if (NULL == UIDTable) //unwind not initialized yet
return NULL;
unsigned size = (level + 4) * sizeof (long); // need to strip __collector_get_return_address and its caller
ucontext_t context;
FILL_CONTEXT ((&context));
char* buf = (char*) alloca (size);
if (buf == NULL)
{
TprintfT (DBG_LT0, "__collector_get_return_address: ERROR: alloca(%d) fails\n", size);
return NULL;
}
int sz = stack_unwind (buf, size, NULL, NULL, &context, 0);
if (sz < (level + 3) * sizeof (long))
{
TprintfT (DBG_LT0, "__collector_get_return_address: size=%d, but stack_unwind returns %d\n", size, sz);
return NULL;
}
long *lbuf = (long*) buf;
TprintfT (DBG_LT2, "__collector_get_return_address: return %lx\n", lbuf[level + 2]);
return (void *) (lbuf[level + 2]);
}
/*
* Collector interface method getFrameInfo
*/
FrameInfo
__collector_get_frame_info (hrtime_t ts, int mode, void *arg)
{
ucontext_t *context = NULL;
void *bptr = NULL;
CM_Array *array = NULL;
int unwind_mode = 0;
int do_walk = 1;
if (mode & FRINFO_NO_WALK)
do_walk = 0;
int bmode = mode & 0xffff;
int pseudo_context = 0;
if (bmode == FRINFO_FROM_STACK_ARG || bmode == FRINFO_FROM_STACK)
{
bptr = arg;
context = (ucontext_t*) alloca (sizeof (ucontext_t));
FILL_CONTEXT (context);
unwind_mode |= bmode;
}
else if (bmode == FRINFO_FROM_UC)
{
context = (ucontext_t*) arg;
if (context == NULL)
return (FrameInfo) 0;
if (GET_SP (context) == 0)
pseudo_context = 1;
}
else if (bmode == FRINFO_FROM_ARRAY)
{
array = (CM_Array*) arg;
if (array == NULL || array->length <= 0)
return (FrameInfo) 0;
}
else
return (FrameInfo) 0;
int max_frame_size = OVERHEAD_BYTES + NATIVE_FRAME_BYTES (max_native_nframes);
if (__collector_java_mode && __collector_java_asyncgetcalltrace_loaded && context && !pseudo_context)
max_frame_size += JAVA_FRAME_BYTES (max_java_nframes);
Frame_packet *frpckt = alloca (sizeof (Frame_packet) + max_frame_size);
frpckt->type = FRAME_PCKT;
frpckt->hsize = sizeof (Frame_packet);
char *d = (char*) (frpckt + 1);
int size = max_frame_size;
#define MIN(a,b) ((a)<(b)?(a):(b))
#if defined(GPROFNG_JAVA_PROFILING)
/* get Java info */
if (__collector_java_mode && __collector_java_asyncgetcalltrace_loaded && context && !pseudo_context)
{
/* use only 2/3 of the buffer and leave the rest for the native stack */
int tmpsz = MIN (size, JAVA_FRAME_BYTES (max_java_nframes));
if (tmpsz > 0)
{
int sz = __collector_ext_jstack_unwind (d, tmpsz, context);
d += sz;
size -= sz;
}
}
#endif
/* get native stack */
if (context)
{
Stack_info *sinfo = (Stack_info*) d;
int sz = sizeof (Stack_info);
d += sz;
size -= sz;
#if ARCH(Intel)
if (omp_no_walk == 0)
do_walk = 1;
#endif
if (do_walk == 0)
unwind_mode |= FRINFO_NO_WALK;
int tmpsz = MIN (size, NATIVE_FRAME_BYTES (max_native_nframes));
if (tmpsz > 0)
{
sz = stack_unwind (d, tmpsz, bptr, NULL, context, unwind_mode);
d += sz;
size -= sz;
}
sinfo->kind = STACK_INFO;
sinfo->hsize = (d - (char*) sinfo);
}
/* create a stack image from user data */
if (array && array->length > 0)
{
Stack_info *sinfo = (Stack_info*) d;
int sz = sizeof (Stack_info);
d += sz;
size -= sz;
sz = array->length;
if (sz > size)
sz = size; // YXXX should we mark this with truncation frame?
__collector_memcpy (d, array->bytes, sz);
d += sz;
size -= sz;
sinfo->kind = STACK_INFO;
sinfo->hsize = (d - (char*) sinfo);
}
/* Compute the total size */
frpckt->tsize = d - (char*) frpckt;
FrameInfo uid = compute_uid (frpckt);
return uid;
}
FrameInfo
compute_uid (Frame_packet *frp)
{
uint64_t idxs[LAST_INFO];
uint64_t uid = ROOT_UID;
uint64_t idx = ROOT_IDX;
Common_info *cinfo = (Common_info*) ((char*) frp + frp->hsize);
char *end = (char*) frp + frp->tsize;
for (;;)
{
if ((char*) cinfo >= end || cinfo->hsize == 0 ||
(char*) cinfo + cinfo->hsize > end)
break;
/* Start with a different value to avoid matching with uid */
uint64_t uidt = 1;
uint64_t idxt = 1;
long *ptr = (long*) ((char*) cinfo + cinfo->hsize);
long *bnd = (long*) ((char*) cinfo + sizeof (Common_info));
TprintfT (DBG_LT2, "compute_uid: Cnt=%ld: ", (long) cinfo->hsize);
while (ptr > bnd)
{
long val = *(--ptr);
tprintf (DBG_LT2, "0x%8.8llx ", (unsigned long long) val);
uidt = (uidt + val) * ROOT_UID;
idxt = (idxt + val) * ROOT_IDX;
uid = (uid + val) * ROOT_UID;
idx = (idx + val) * ROOT_IDX;
}
if (cinfo->kind == STACK_INFO || cinfo->kind == JAVA_INFO)
{
cinfo->uid = uidt;
idxs[cinfo->kind] = idxt;
}
cinfo = (Common_info*) ((char*) cinfo + cinfo->hsize);
}
tprintf (DBG_LT2, "\n");
/* Check if we have already recorded that uid.
* The following fragment contains benign data races.
* It's important, though, that all reads from UIDTable
* happen before writes.
*/
int found1 = 0;
int idx1 = (int) ((idx >> 44) % UIDTableSize);
if (UIDTable[idx1] == uid)
found1 = 1;
int found2 = 0;
int idx2 = (int) ((idx >> 24) % UIDTableSize);
if (UIDTable[idx2] == uid)
found2 = 1;
int found3 = 0;
int idx3 = (int) ((idx >> 4) % UIDTableSize);
if (UIDTable[idx3] == uid)
found3 = 1;
if (!found1)
UIDTable[idx1] = uid;
if (!found2)
UIDTable[idx2] = uid;
if (!found3)
UIDTable[idx3] = uid;
if (found1 || found2 || found3)
return (FrameInfo) uid;
frp->uid = uid;
/* Compress info's */
cinfo = (Common_info*) ((char*) frp + frp->hsize);
for (;;)
{
if ((char*) cinfo >= end || cinfo->hsize == 0 ||
(char*) cinfo + cinfo->hsize > end)
break;
if (cinfo->kind == STACK_INFO || cinfo->kind == JAVA_INFO)
{
long *ptr = (long*) ((char*) cinfo + sizeof (Common_info));
long *bnd = (long*) ((char*) cinfo + cinfo->hsize);
uint64_t uidt = cinfo->uid;
uint64_t idxt = idxs[cinfo->kind];
int found = 0;
int first = 1;
while (ptr < bnd - 1)
{
int idx1 = (int) ((idxt >> 44) % UIDTableSize);
if (UIDTable[idx1] == uidt)
{
found = 1;
break;
}
else if (first)
{
first = 0;
UIDTable[idx1] = uidt;
}
long val = *ptr++;
uidt = uidt * ROOT_UID_INV - val;
idxt = idxt * ROOT_IDX_INV - val;
}
if (found)
{
char *d = (char*) ptr;
char *s = (char*) bnd;
if (!first)
{
int i;
for (i = 0; i<sizeof (uidt); i++)
{
*d++ = (char) uidt;
uidt = uidt >> 8;
}
}
int delta = s - d;
while (s < end)
*d++ = *s++;
cinfo->kind |= COMPRESSED_INFO;
cinfo->hsize -= delta;
frp->tsize -= delta;
end -= delta;
}
}
cinfo = (Common_info*) ((char*) cinfo + cinfo->hsize);
}
__collector_write_packet (dhndl, (CM_Packet*) frp);
return (FrameInfo) uid;
}
FrameInfo
__collector_getUID (CM_Array *arg, FrameInfo suid)
{
if (arg->length % sizeof (long) != 0 ||
(long) arg->bytes % sizeof (long) != 0)
return (FrameInfo) - 1;
if (arg->length == 0)
return suid;
uint64_t uid = suid ? suid : 1;
uint64_t idx = suid ? suid : 1;
long *ptr = (long*) ((char*) arg->bytes + arg->length);
long *bnd = (long*) (arg->bytes);
while (ptr > bnd)
{
long val = *(--ptr);
uid = (uid + val) * ROOT_UID;
idx = (idx + val) * ROOT_IDX;
}
/* Check if we have already recorded that uid.
* The following fragment contains benign data races.
* It's important, though, that all reads from UIDTable
* happen before writes.
*/
int found1 = 0;
int idx1 = (int) ((idx >> 44) % UIDTableSize);
if (UIDTable[idx1] == uid)
found1 = 1;
int found2 = 0;
int idx2 = (int) ((idx >> 24) % UIDTableSize);
if (UIDTable[idx2] == uid)
found2 = 1;
int found3 = 0;
int idx3 = (int) ((idx >> 4) % UIDTableSize);
if (UIDTable[idx3] == uid)
found3 = 1;
if (!found1)
UIDTable[idx1] = uid;
if (!found2)
UIDTable[idx2] = uid;
if (!found3)
UIDTable[idx3] = uid;
if (found1 || found2 || found3)
return (FrameInfo) uid;
int sz = sizeof (Uid_packet) + arg->length;
if (suid)
sz += sizeof (suid);
Uid_packet *uidp = alloca (sz);
uidp->tsize = sz;
uidp->type = UID_PCKT;
uidp->flags = 0;
uidp->uid = uid;
/* Compress */
ptr = (long*) (arg->bytes);
bnd = (long*) ((char*) arg->bytes + arg->length);
long *dst = (long*) (uidp + 1);
uint64_t uidt = uid;
uint64_t idxt = idx;
uint64_t luid = suid; /* link uid */
while (ptr < bnd)
{
long val = *ptr++;
*dst++ = val;
if ((bnd - ptr) > sizeof (uidt))
{
uidt = uidt * ROOT_UID_INV - val;
idxt = idxt * ROOT_IDX_INV - val;
int idx1 = (int) ((idxt >> 44) % UIDTableSize);
if (UIDTable[idx1] == uidt)
{
luid = uidt;
break;
}
}
}
if (luid)
{
char *d = (char*) dst;
for (int i = 0; i<sizeof (luid); i++)
{
*d++ = (char) luid;
luid = luid >> 8;
}
uidp->flags |= COMPRESSED_INFO;
uidp->tsize = d - (char*) uidp;
}
__collector_write_packet (dhndl, (CM_Packet*) uidp);
return (FrameInfo) uid;
}
int
__collector_getStackTrace (void *buf, int size, void *bptr, void *eptr, void *arg)
{
if (arg == (void*) __collector_omp_stack_trace)
seen_omp = 1;
int do_walk = 1;
if (arg == NULL || arg == (void*) __collector_omp_stack_trace)
{
do_walk = (arg == (void*) __collector_omp_stack_trace && omp_no_walk) ? 0 : 1;
ucontext_t *context = (ucontext_t*) alloca (sizeof (ucontext_t));
FILL_CONTEXT (context);
arg = context;
}
int unwind_mode = 0;
if (do_walk == 0)
unwind_mode |= FRINFO_NO_WALK;
return stack_unwind (buf, size, bptr, eptr, arg, unwind_mode);
}
#if ARCH(SPARC)
/*
* These are important data structures taken from the header files reg.h and
* ucontext.h. They are used for the stack trace algorithm explained below.
*
* typedef struct ucontext {
* u_long uc_flags;
* struct ucontext *uc_link;
* usigset_t uc_sigmask;
* stack_t uc_stack;
* mcontext_t uc_mcontext;
* long uc_filler[23];
* } ucontext_t;
*
* #define SPARC_MAXREGWINDOW 31
*
* struct rwindow {
* greg_t rw_local[8];
* greg_t rw_in[8];
* };
*
* #define rw_fp rw_in[6]
* #define rw_rtn rw_in[7]
*
* struct gwindows {
* int wbcnt;
* int *spbuf[SPARC_MAXREGWINDOW];
* struct rwindow wbuf[SPARC_MAXREGWINDOW];
* };
*
* typedef struct gwindows gwindows_t;
*
* typedef struct {
* gregset_t gregs;
* gwindows_t *gwins;
* fpregset_t fpregs;
* long filler[21];
* } mcontext_t;
*
* The stack would look like this when SIGPROF occurrs.
*
* ------------------------- <- high memory
* | |
* | |
* -------------------------
* | |
* ------------------------- <- fp' <-|
* | | |
* : : |
* | | |
* ------------------------- |
* | fp |----------|
* | |
* ------------------------- <- sp'
* | | | |
* | gwins | <- saved stack pointers & | |
* | | register windows | |- mcontext
* ------------------------- | |
* | gregs | <- saved registers | |
* ------------------------- |
* | | |- ucontext
* ------------------------- <- ucp (ucontext pointer) |
* | | |
* | | |- siginfo
* ------------------------- <- sip (siginfo pointer) |
* | |
* ------------------------- <- sp
*
* Then the signal handler is called with:
* handler( signo, sip, uip );
* When gwins is null, all the stack frames are saved in the user stack.
* In that case we can find sp' from gregs and walk the stack for a backtrace.
* However, if gwins is not null we will have a more complicated case.
* Wbcnt(in gwins) tells you how many saved register windows are valid.
* This is important because the kernel does not allocate the entire array.
* And the top most frame is saved in the lowest index element. The next
* paragraph explains the possible causes.
*
* There are two routines in the kernel to flush out user register windows.
* flush_user_windows and flush_user_windows_to_stack
* The first routine will not cause a page fault. Therefore if the user
* stack is not in memory, the register windows will be saved to the pcb.
* This can happen when the kernel is trying to deliver a signal and
* the user stack got swap out. The kernel will then build a new context for
* the signal handler and the saved register windows will
* be copied to the ucontext as show above. On the other hand,
* flush_user_windows_to_stack can cause a page fault, and if it failed
* then there is something wrong (stack overflow, misalign).
* The first saved register window does not necessary correspond to the
* first stack frame. So the current stack pointer must be compare with
* the stack pointers in spbuf to find a match.
*
* We will also follow the uc_link field in ucontext to trace also nested
* signal stack frames.
*
*/
/* Dealing with trap handlers.
* When a user defined trap handler is invoked the return address
* (or actually the address of an instruction that raised the trap)
* is passed to the trap handler in %l6, whereas saved %o7 contains
* garbage. First, we need to find out if a particular pc belongs
* to the trap handler, and if so, take the %l6 value from the stack rather
* than %o7 from either the stack or the register.
* There are three possible situations represented
* by the following stacks:
*
* MARKER MARKER MARKER
* trap handler pc __func pc before 'save' __func pc after 'save'
* %l6 %o7 from reg %o7 (garbage)
* ... %l6 trap handler pc
* ... %l6
* ...
* where __func is a function called from the trap handler.
*
* Currently this is implemented to only deal with __misalign_trap_handler
* set for v9 FORTRAN applications. Implementation of IN_TRAP_HANDLER
* macro shows it. A general solution is postponed.
*/
/* Special handling of unwind through the parallel loop barrier code:
*
* The library defines two symbols, __mt_EndOfTask_Barrier_ and
* __mt_EndOfTask_Barrier_Dummy_ representing the first word of
* the barrier sychronization code, and the first word following
* it. Whenever the leaf PC is between these two symbols,
* the unwind code is special-cased as follows:
* The __mt_EndOfTask_Barrier_ function is guaranteed to be a leaf
* function, so its return address is in a register, not saved on
* the stack.
*
* MARKER
* __mt_EndOfTask_Barrier_ PC -- the leaf PC
* loop body function address for the task -- implied caller of __mt_EndOfTask_Barrier_
* this address is taken from the %O0 register
* {mt_master or mt_slave} -- real caller of __mt_EndOfTask_Barrier_
* ...
*
* With this trick, the analyzer will show the time in the barrier
* attributed to the loop at the end of which the barrier synchronization
* is taking place. That loop body routine, will be shown as called
* from the function from which it was extracted, which will be shown
* as called from the real caller, either the slave or master library routine.
*/
/*
* These no-fault-load (0x82) assembly functions are courtesy of Rob Gardner.
*
* Note that 0x82 is ASI_PNF. See
* http://lxr.free-electrons.com/source/arch/sparc/include/uapi/asm/asi.h#L134
* ASI address space identifier; PNF primary no fault
*/
/* load an int from an address */
/* if the address is illegal, return a 0 */
static int
SPARC_no_fault_load_int (void *addr)
{
int val;
__asm__ __volatile__(
"lda [%1] 0x82, %0\n\t"
: "=r" (val)
: "r" (addr)
);
return val;
}
/* check if an address is invalid
*
* A no-fault load of an illegal address still faults, but it does so silently to the calling process.
* It returns a 0, but so could a load of a legal address.
* So, we time the load. A "fast" load must be a successful load.
* A "slow" load is probably a fault.
* Since it could also be a cache/TLB miss or other abnormality,
* it's safest to retry a slow load.
* The cost of trying a valid address should be some nanosecs.
* The cost of trying an invalid address up to 10 times could be some microsecs.
*/
#if 0
static
int invalid_SPARC_addr(void *addr)
{
long t1, t2;
int i;
for (i=0; i<10; i++) {
__asm__ __volatile__(
"rd %%tick, %0\n\t"
"lduba [%2] 0x82, %%g0\n\t"
"rd %%tick, %1\n\t"
: "=r" (t1), "=r" (t2)
: "r" (addr) );
if ( (t2 - t1) < 100 )
return 0;
}
return 1;
}
#endif
/*
* The standard SPARC procedure-calling convention is that the
* calling PC (for determining the return address when the procedure
* is finished) is placed in register %o7. A called procedure
* typically executes a "save" instruction that shifts the register
* window, and %o7 becomes %i7.
*
* Optimized leaf procedures do not shift the register window.
* They assume the return address will remain %o7. So when
* we process a leaf PC, we walk instructions to see if there
* is a call, restore, or other instruction that would indicate
* we can IGNORE %o7 because this is NOT a leaf procedure.
*
* If a limited instruction walk uncovers no such hint, we save
* not only the PC but the %o7 value as well... just to be safe.
* Later, in DBE post-processing of the call stacks, we decide
* whether any recorded %o7 value should be used as a caller
* frame or should be discarded.
*/
#define IS_ILLTRAP(x) (((x) & 0xc1c00000) == 0)
#define IS_SAVE(x) (((x) & 0xc1f80000) == 0x81e00000)
#define IS_MOVO7R(x) (((x) & 0xc1f8201f) == 0x8160000f)
#define IS_MOVRO7(x) (((x) & 0xfff82000) == 0x9f600000)
#define IS_ORRG0O7(x) (((x) & 0xff78201f) == 0x9e100000)
#define IS_ORG0RO7(x) (((x) & 0xff7fe000) == 0x9e100000)
#define IS_ORG0O7R(x) (((x) & 0xc17fe01f) == 0x8010000f)
#define IS_ORO7G0R(x) (((x) & 0xc17fe01f) == 0x8013c000)
#define IS_RESTORE(x) (((x) & 0xc1f80000) == 0x81e80000)
#define IS_RET(x) ((x) == 0x81c7e008)
#define IS_RETL(x) ((x) == 0x81c3e008)
#define IS_RETURN(x) (((x) & 0xc1f80000) == 0x81c80000)
#define IS_BRANCH(x) ((((x) & 0xc0000000) == 0) && (((x) & 0x01c00000) != 0x01000000))
#define IS_CALL(x) (((x) & 0xc0000000) == 0x40000000)
#define IS_LDO7(x) (((x) & 0xfff80000) == 0xde000000)
static long pagesize = 0;
static int
process_leaf (long *lbuf, int ind, int lsize, void *context)
{
greg_t pc = GET_PC (context);
greg_t o7 = GET_GREG (context, REG_O7);
/* omazur: TBR START -- not used */
if (IN_BARRIER (pc))
{
if (ind < lsize)
lbuf[ind++] = pc;
if (ind < lsize)
lbuf[ind++] = GET_GREG (context, REG_O0);
return ind;
}
/* omazur: TBR END */
#if WSIZE(64)
if (IN_TRAP_HANDLER (pc))
{
if (ind < lsize)
lbuf[ind++] = pc;
return ind;
}
#endif
unsigned *instrp = (unsigned *) pc;
unsigned *end_addr = instrp + 20;
while (instrp < end_addr)
{
unsigned instr = *instrp++;
if (IS_ILLTRAP (instr))
break;
else if (IS_SAVE (instr))
{
if (ind < lsize)
lbuf[ind++] = pc;
if (o7 && ind < lsize)
lbuf[ind++] = o7;
return ind;
}
else if (IS_MOVO7R (instr) || IS_ORG0O7R (instr) || IS_ORO7G0R (instr))
break;
else if (IS_MOVRO7 (instr) || IS_ORG0RO7 (instr))
{
int rs2 = (instr & 0x1f) + REG_G1 - 1;
o7 = (rs2 <= REG_O7) ? GET_GREG (context, rs2) : 0;
break;
}
else if (IS_ORRG0O7 (instr))
{
int rs2 = ((instr & 0x7c000) >> 14) + REG_G1 - 1;
o7 = (rs2 <= REG_O7) ? GET_GREG (context, rs2) : 0;
break;
}
else if (IS_RESTORE (instr))
{
o7 = 0;
break;
}
else if (IS_RETURN (instr))
{
o7 = 0;
break;
}
else if (IS_RET (instr))
{
o7 = 0;
break;
}
else if (IS_RETL (instr))
{
/* process delay slot */
instr = *instrp++;
if (IS_RESTORE (instr))
o7 = 0;
break;
}
else if (IS_BRANCH (instr))
{
unsigned *backbegin = ((unsigned *) pc - 1);
unsigned *backend = backbegin - 12 + (instrp - (unsigned *) pc);
while (backbegin > backend)
{
// 21920143 stack unwind: SPARC process_leaf backtracks too far
/*
* We've already dereferenced backbegin+1.
* So if backbegin is on the same page, we're fine.
* If we've gone to a different page, possibly things are not fine.
* We don't really know how to test that.
* Let's just assume the worst: that dereferencing backbegin would segv.
* We won't know if we're in a leaf function or not.
*/
if (pagesize == 0)
pagesize = CALL_UTIL (sysconf)(_SC_PAGESIZE);
if ((((long) (backbegin + 1)) & (pagesize - 1)) < sizeof (unsigned*))
break;
unsigned backinstr = *backbegin--;
if (IS_LDO7 (backinstr))
{
o7 = 0;
break;
}
else if (IS_ILLTRAP (backinstr))
break;
else if (IS_RETURN (backinstr))
break;
else if (IS_RET (backinstr))
break;
else if (IS_RETL (backinstr))
break;
else if (IS_CALL (backinstr))
break;
else if (IS_SAVE (backinstr))
{
o7 = 0;
break;
}
}
break;
}
else if (IS_CALL (instr))
o7 = 0;
}
#if WSIZE(64)
if (o7 != 0 && ((long) o7) < 32 && ((long) o7) > -32)
{
/* 20924821 SEGV in unwind code on SPARC/Linux
* We've seen this condition in some SPARC-Linux runs.
* o7 is non-zero but not a valid address.
* Values like 4 or -7 have been seen.
* Let's check if o7 is unreasonably small.
* If so, set to 0 so that it won't be recorded.
* Otherwise, there is risk of it being dereferenced in process_sigreturn().
*/
// __collector_log_write("<event kind=\"%s\" id=\"%d\">time %lld, internal debug unwind at leaf; o7 = %ld, pc = %x</event>\n",
// SP_JCMD_COMMENT, COL_COMMENT_NONE, __collector_gethrtime() - __collector_start_time, (long) o7, pc );
o7 = 0;
}
#endif
if (o7)
{
if (ind < lsize)
lbuf[ind++] = SP_LEAF_CHECK_MARKER;
if (ind < lsize)
lbuf[ind++] = pc;
if (ind < lsize)
lbuf[ind++] = o7;
}
else if (ind < lsize)
lbuf[ind++] = pc;
return ind;
}
#if WSIZE(64)
// detect signal handler
static int
process_sigreturn (long *lbuf, int ind, int lsize, unsigned char * tpc,
struct frame **pfp, void * bptr, int extra_frame)
{
// cheap checks whether tpc is obviously not an instruction address
if ((4096 > (unsigned long) tpc) // the first page is off limits
|| (3 & (unsigned long) tpc))
return ind; // the address is not aligned
// get the instruction at tpc, skipping over as many as 7 nop's (0x01000000)
int insn, i;
for (i = 0; i < 7; i++)
{
insn = SPARC_no_fault_load_int ((void *) tpc);
if (insn != 0x01000000)
break;
tpc += 4;
}
// we're not expecting 0 (and it could mean an illegal address)
if (insn == 0)
return ind;
// We are looking for __rt_sigreturn_stub with the instruction
// 0x82102065 : mov 0x65 /* __NR_rt_sigreturn */, %g1
if (insn == 0x82102065)
{
/*
* according to linux kernel source code,
* syscall(_NR_rt_sigreturn) uses the following data in stack:
* struct rt_signal_frame {
* struct sparc_stackf ss;
* siginfo_t info;
* struct pt_regs regs;
* ....};
* sizeof(struct sparc_stackf) is 192;
* sizeof(siginfo_t) is 128;
* we need to get the register values from regs, which is defined as:
* struct pt_regs {
* unsigned long u_regs[16];
* unsigned long tstate;
* unsigned long tpc;
* unsigned long tnpc;
* ....};
* pc and fp register has offset of 120 and 112;
* the pc of kill() is stored in tnpc, whose offest is 136.
*/
greg_t pc = *((unsigned long*) ((char*) ((*pfp)) + 192 + 128 + 136));
greg_t pc1 = *((unsigned long*) ((char*) ((*pfp)) + 192 + 128 + 120));
(*pfp) = *((struct frame**) ((char*) ((*pfp)) + 192 + 128 + 112));
if (pc && pc1)
{
if (bptr != NULL && extra_frame && ((char*) (*pfp) + STACK_BIAS) < (char*) bptr && ind < 2)
{
lbuf[0] = pc1;
if (ind == 0)
ind++;
}
if (bptr == NULL || ((char*) (*pfp) + STACK_BIAS) >= (char*) bptr)
{
if (ind < lsize)
lbuf[ind++] = (unsigned long) tpc;
if (ind < lsize)
lbuf[ind++] = pc;
if (ind < lsize)
lbuf[ind++] = pc1;
}
}
DprintfT (SP_DUMP_UNWIND, "unwind.c: resolved sigreturn pc=0x%lx, pc1=0x%lx, fp=0x%lx\n", pc, pc1, *(pfp));
}
return ind;
}
#endif
/*
* int stack_unwind( char *buf, int size, ucontext_t *context )
* This routine looks into the mcontext and
* trace stack frames to record return addresses.
*/
int
stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode)
{
/*
* trace the stack frames from user stack.
* We are assuming that the frame pointer and return address
* are null when we are at the top level.
*/
long *lbuf = (long*) buf;
int lsize = size / sizeof (long);
struct frame *fp = (struct frame *) GET_SP (context); /* frame pointer */
greg_t pc; /* program counter */
int extra_frame = 0;
if ((mode & 0xffff) == FRINFO_FROM_STACK)
extra_frame = 1;
int ind = 0;
if (bptr == NULL)
ind = process_leaf (lbuf, ind, lsize, context);
int extra_frame = 0;
if ((mode & 0xffff) == FRINFO_FROM_STACK)
extra_frame = 1;
int ind = 0;
if (bptr == NULL)
ind = process_leaf (lbuf, ind, lsize, context);
while (fp)
{
if (ind >= lsize)
break;
fp = (struct frame *) ((char *) fp + STACK_BIAS);
if (eptr && fp >= (struct frame *) eptr)
{
ind = ind >= 2 ? ind - 2 : 0;
break;
}
#if WSIZE(64) // detect signal handler
unsigned char * tpc = ((unsigned char*) (fp->fr_savpc));
struct frame * tfp = (struct frame*) ((char*) (fp->fr_savfp) + STACK_BIAS);
int old_ind = ind;
ind = process_sigreturn (lbuf, old_ind, lsize, tpc, &tfp, bptr, extra_frame);
if (ind != old_ind)
{
pc = (greg_t) tpc;
fp = tfp;
}
else
#endif
{
#if WSIZE(64)
if (IN_TRAP_HANDLER (lbuf[ind - 1]))
pc = fp->fr_local[6];
else
pc = fp->fr_savpc;
#else
pc = fp->fr_savpc;
#endif
fp = fp->fr_savfp;
if (pc)
{
if (bptr != NULL && extra_frame && ((char*) fp + STACK_BIAS) < (char*) bptr && ind < 2)
{
lbuf[0] = pc;
if (ind == 0)
ind++;
}
if (bptr == NULL || ((char*) fp + STACK_BIAS) >= (char*) bptr)
lbuf[ind++] = pc;
}
}
/* 4616238: _door_return may have a frame that has non-zero
* saved stack pointer and zero pc
*/
if (pc == (greg_t) NULL)
break;
}
if (ind >= lsize)
{ /* truncated stack handling */
ind = lsize - 1;
lbuf[ind++] = SP_TRUNC_STACK_MARKER;
}
return ind * sizeof (long);
}
#elif ARCH(Intel)
/* get __NR_<syscall_name> constants */
#include <syscall.h>
/*
* From uts/intel/ia32/os/sendsig.c:
*
* An amd64 signal frame looks like this on the stack:
*
* old %rsp:
* <128 bytes of untouched stack space>
* <a siginfo_t [optional]>
* <a ucontext_t>
* <siginfo_t *>
* <signal number>
* new %rsp: <return address (deliberately invalid)>
*
* The signal number and siginfo_t pointer are only pushed onto the stack in
* order to allow stack backtraces. The actual signal handling code expects the
* arguments in registers.
*
* An i386 SVR4/ABI signal frame looks like this on the stack:
*
* old %esp:
* <a siginfo32_t [optional]>
* <a ucontext32_t>
* <pointer to that ucontext32_t>
* <pointer to that siginfo32_t>
* <signo>
* new %esp: <return address (deliberately invalid)>
*/
#if WSIZE(32)
#define OPC_REG(x) ((x)&0x7)
#define MRM_REGD(x) (((x)>>3)&0x7)
#define MRM_REGS(x) ((x)&0x7)
#define RED_ZONE 0
#elif WSIZE(64)
#define OPC_REG(x) (B|((x)&0x7))
#define MRM_REGD(x) (R|(((x)>>3)&0x7))
#define MRM_REGS(x) (B|((x)&0x7))
#define RED_ZONE 16
#endif
#define MRM_EXT(x) (((x)>>3)&0x7)
#define MRM_MOD(x) ((x)&0xc0)
#define RAX 0
#define RDX 2
#define RSP 4
#define RBP 5
struct AdvWalkContext
{
unsigned char *pc;
unsigned long *sp;
unsigned long *sp_safe;
unsigned long *fp;
unsigned long *fp_sav;
unsigned long *fp_loc;
unsigned long rax;
unsigned long rdx;
unsigned long ra_sav;
unsigned long *ra_loc;
unsigned long regs[16];
int tidx; /* targets table index */
uint32_t cval; /* cache value */
};
static unsigned long
getRegVal (struct AdvWalkContext *cur, int r, int *undefRez)
{
if (cur->regs[r] == 0)
{
if (r == RBP)
{
tprintf (DBG_LT3, "getRegVal: returns cur->regs[RBP]=0x%lx cur->pc=0x%lx\n",
(unsigned long) cur->fp, (unsigned long) cur->pc);
return (unsigned long) cur->fp;
}
*undefRez = 1;
}
tprintf (DBG_LT3, "getRegVal: cur->regs[%d]=0x%lx cur->pc=0x%lx\n",
r, (unsigned long) cur->regs[r], (unsigned long) cur->pc);
return cur->regs[r];
}
static unsigned char *
check_modrm (unsigned char *pc)
{
unsigned char modrm = *pc++;
unsigned char mod = MRM_MOD (modrm);
if (mod == 0xc0)
return pc;
unsigned char regs = modrm & 0x07;
if (regs == RSP)
{
if (mod == 0x40)
return pc + 2; // SIB + disp8
if (mod == 0x80)
return pc + 5; // SIB + disp32
return pc + 1; // SIB
}
if (mod == 0x0)
{
if (regs == RBP)
pc += 4; // disp32
}
else if (mod == 0x40)
pc += 1; /* byte */
else if (mod == 0x80)
pc += 4; /* word */
return pc;
}
static int
read_int (unsigned char *pc, int w)
{
if (w == 1)
return *((char *) pc);
if (w == 2)
return *(short*) pc;
return *(int*) pc;
}
/* Return codes */
enum
{
RA_FAILURE = 0,
RA_SUCCESS,
RA_END_OF_STACK,
RA_SIGRETURN,
RA_RT_SIGRETURN
};
/* Cache value encodings */
static const uint32_t RA_FROMFP = (uint32_t) - 1; /* get the RA from the frame pointer */
static const uint32_t RA_EOSTCK = (uint32_t) - 2; /* end-of-stack */
#define MAXCTX 16
#define MAXTRGTS 64
#define MAXJMPREG 2
#define MAXJMPREGCTX 3
#define DELETE_CURCTX() __collector_memcpy (cur, buf + (--nctx), sizeof (*cur))
/**
* Look for pc in AddrTable_RA_FROMFP and in AddrTable_RA_EOSTCK
* @param wctx
* @return
*/
static int
cache_get (struct WalkContext *wctx)
{
unsigned long addr;
if (AddrTable_RA_FROMFP != NULL)
{
uint64_t idx = wctx->pc % ValTableSize;
addr = AddrTable_RA_FROMFP[ idx ];
if (addr == wctx->pc)
{ // Found in AddrTable_RA_FROMFP
unsigned long *sp = NULL;
unsigned long fp = wctx->fp;
/* validate fp before use */
if (fp < wctx->sp || fp >= wctx->sbase - sizeof (*sp))
return RA_FAILURE;
sp = (unsigned long *) fp;
fp = *sp++;
unsigned long ra = *sp++;
unsigned long tbgn = wctx->tbgn;
unsigned long tend = wctx->tend;
if (ra < tbgn || ra >= tend)
if (!__collector_check_segment (ra, &tbgn, &tend, 0))
return RA_FAILURE;
unsigned long npc = adjust_ret_addr (ra, ra - tbgn, tend);
if (npc == 0)
return RA_FAILURE;
DprintfT (SP_DUMP_UNWIND, "unwind.c:%d cached pc=0x%lX\n", __LINE__, npc);
wctx->pc = npc;
wctx->sp = (unsigned long) sp;
wctx->fp = fp;
wctx->tbgn = tbgn;
wctx->tend = tend;
return RA_SUCCESS;
}
}
if (NULL == AddrTable_RA_EOSTCK)
return RA_FAILURE;
uint64_t idx = wctx->pc % ValTableSize;
addr = AddrTable_RA_EOSTCK[ idx ];
if (addr != wctx->pc)
return RA_FAILURE;
DprintfT (SP_DUMP_UNWIND, "unwind.c:%d cached RA_END_OF_STACK\n", __LINE__);
return RA_END_OF_STACK;
}
/**
* Save pc in RA_FROMFP or RA_EOSTCK cache depending on val
* @param wctx
*/
static void
cache_put (struct WalkContext *wctx, const uint32_t val)
{
if (RA_FROMFP == val)
{
// save pc in RA_FROMFP cache
if (NULL != AddrTable_RA_FROMFP)
{
uint64_t idx = wctx->pc % ValTableSize;
AddrTable_RA_FROMFP[ idx ] = wctx->pc;
if (NULL != AddrTable_RA_EOSTCK)
if (AddrTable_RA_EOSTCK[ idx ] == wctx->pc)
// invalidate pc in RA_EOSTCK cache
AddrTable_RA_EOSTCK[ idx ] = 0;
}
return;
}
if (RA_EOSTCK == val)
{
// save pc in RA_EOSTCK cache
if (NULL != AddrTable_RA_EOSTCK)
{
uint64_t idx = wctx->pc % ValTableSize;
AddrTable_RA_EOSTCK[ idx ] = wctx->pc;
if (NULL != AddrTable_RA_FROMFP)
{
if (AddrTable_RA_FROMFP[ idx ] == wctx->pc)
// invalidate pc in RA_FROMFP cache
AddrTable_RA_FROMFP[ idx ] = 0;
}
}
return;
}
}
static int
process_return_real (struct WalkContext *wctx, struct AdvWalkContext *cur, int cache_on)
{
if ((unsigned long) cur->sp >= wctx->sbase ||
(unsigned long) cur->sp < wctx->sp)
{
DprintfT (SP_DUMP_UNWIND, "unwind.c: not in stack: %p [0x%lX-0x%lX]\n",
cur->sp, wctx->sp, wctx->sbase);
return RA_FAILURE;
}
unsigned long ra;
if (cur->sp == cur->ra_loc)
{
ra = cur->ra_sav;
cur->sp++;
}
else if (cur->sp >= cur->sp_safe && (unsigned long) cur->sp < wctx->sbase)
ra = *cur->sp++;
else
{
DprintfT (SP_DUMP_UNWIND, "unwind.c: not safe: %p >= %p\n", cur->sp, cur->sp_safe);
return RA_FAILURE;
}
if (ra == 0)
{
if (cache_on)
cache_put (wctx, RA_EOSTCK);
wctx->pc = ra;
wctx->sp = (unsigned long) cur->sp;
wctx->fp = (unsigned long) cur->fp;
DprintfT (SP_DUMP_UNWIND, "unwind.c:%d RA_END_OF_STACK\n", __LINE__);
return RA_END_OF_STACK;
}
unsigned long tbgn = wctx->tbgn;
unsigned long tend = wctx->tend;
if (ra < tbgn || ra >= tend)
{
if (!__collector_check_segment (ra, &tbgn, &tend, 0))
{
DprintfT (SP_DUMP_UNWIND, "unwind.c: not in segment: 0x%lX [0x%lX-0x%lX]\n",
ra, wctx->tbgn, wctx->tend);
return RA_FAILURE;
}
}
if (cur->cval == RA_FROMFP)
{
if (wctx->fp == (unsigned long) (cur->sp - 2))
{
if (cache_on)
cache_put (wctx, RA_FROMFP);
}
else
cur->cval = 0;
}
unsigned long npc = adjust_ret_addr (ra, ra - tbgn, tend);
if (npc == 0)
{
if (cur->cval == RA_FROMFP)
{
/* We have another evidence that we can trust this RA */
DprintfT (SP_DUMP_UNWIND, "unwind.c: trusted fp, pc = 0x%lX\n", wctx->pc);
wctx->pc = ra;
}
else
{
DprintfT (SP_DUMP_UNWIND, "unwind.c: 0 after adjustment\n");
return RA_FAILURE;
}
}
else
wctx->pc = npc;
wctx->sp = (unsigned long) cur->sp;
wctx->fp = (unsigned long) cur->fp;
wctx->tbgn = tbgn;
wctx->tend = tend;
return RA_SUCCESS;
}
static int
process_return (struct WalkContext *wctx, struct AdvWalkContext *cur)
{
return process_return_real (wctx, cur, 1);
}
static void
omp_cache_put (unsigned long *cur_sp_safe, struct WalkContext * wctx_pc_save,
struct WalkContext *wctx, uint32_t val)
{
if (omp_no_walk && (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL))
{
size_t sz = OmpValTableSize * sizeof (*OmpCurCtxs);
OmpCurCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1);
sz = OmpValTableSize * sizeof (*OmpCtxs);
OmpCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1);
sz = OmpValTableSize * sizeof (*OmpVals);
OmpVals = (uint32_t*) __collector_allocCSize (__collector_heap, sz, 1);
sz = OmpValTableSize * sizeof (*OmpRAs);
OmpRAs = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1);
}
if (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL)
return;
#define USE_18434988_OMP_CACHE_WORKAROUND
#ifndef USE_18434988_OMP_CACHE_WORKAROUND
uint64_t idx = wctx_pc_save->pc * ROOT_IDX;
OmpVals[ idx % OmpValTableSize ] = val;
idx = (idx + val) * ROOT_IDX;
__collector_memcpy (&(OmpCurCtxs[ idx % OmpValTableSize ]), wctx_pc_save, sizeof (struct WalkContext));
idx = (idx + val) * ROOT_IDX;
__collector_memcpy (&(OmpCtxs[ idx % OmpValTableSize ]), wctx, sizeof (struct WalkContext));
#endif
unsigned long *sp = NULL;
unsigned long fp = wctx_pc_save->fp;
int from_fp = 0;
if (val == RA_END_OF_STACK)
{
sp = (unsigned long *) (wctx->sp);
sp--;
TprintfT (DBG_LT1, "omp_cache_put: get sp from EOS, sp=%p\n", sp);
}
else
{
if (fp < wctx_pc_save->sp || fp >= wctx_pc_save->sbase - sizeof (*sp))
{
sp = (unsigned long *) (wctx->sp);
sp--;
TprintfT (DBG_LT1, "omp_cache_put: get sp from sp, sp=%p\n", sp);
}
else
{
TprintfT (DBG_LT1, "omp_cache_put: get sp from fp=0x%lx\n", fp);
sp = (unsigned long *) fp;
from_fp = 1;
}
}
if (sp < cur_sp_safe || ((unsigned long) sp >= wctx->sbase))
return;
unsigned long ra = *sp++;
if (from_fp)
{
unsigned long tbgn = wctx_pc_save->tbgn;
unsigned long tend = wctx_pc_save->tend;
if (ra < tbgn || ra >= tend)
{
sp = (unsigned long *) (wctx->sp);
sp--;
ra = *sp++;
}
}
#ifdef USE_18434988_OMP_CACHE_WORKAROUND
uint64_t idx1 = wctx_pc_save->pc * ROOT_IDX;
uint64_t idx2 = (idx1 + val) * ROOT_IDX;
uint64_t idx3 = (idx2 + val) * ROOT_IDX;
uint64_t idx4 = (idx3 + val) * ROOT_IDX;
OmpRAs [ idx4 % OmpValTableSize ] = 0; // lock
OmpVals[ idx1 % OmpValTableSize ] = val;
__collector_memcpy (&(OmpCurCtxs[ idx2 % OmpValTableSize ]), wctx_pc_save, sizeof (struct WalkContext));
__collector_memcpy (&(OmpCtxs [ idx3 % OmpValTableSize ]), wctx, sizeof (struct WalkContext));
OmpRAs [ idx4 % OmpValTableSize ] = ra;
#else
idx = (idx + val) * ROOT_IDX;
OmpRAs[ idx % OmpValTableSize ] = ra;
#endif
TprintfT (DBG_LT1, "omp_cache_put: pc=0x%lx\n", wctx_pc_save->pc);
}
/*
* See bug 17166877 - malloc_internal unwind failure.
* Sometimes there are several calls right after ret, like:
* leave
* ret
* call xxx
* call xxxx
* call xxxxx
* If they are also jump targets, we should better not
* create new jump context for those, since they may
* end up into some other function.
*/
static int
is_after_ret (unsigned char * npc)
{
if (*npc != 0xe8)
return 0;
unsigned char * onpc = npc;
int ncall = 1;
int maxsteps = 10;
int mincalls = 3;
int steps = 0;
while (*(npc - 5) == 0xe8 && steps < maxsteps)
{
npc -= 5;
ncall++;
steps++;
}
if (*(npc - 1) != 0xc3 || *(npc - 2) != 0xc9)
return 0;
steps = 0;
while (*(onpc + 5) == 0xe8 && steps < maxsteps)
{
onpc += 5;
ncall++;
steps++;
}
if (ncall < mincalls)
return 0;
return 1;
}
static int
find_i386_ret_addr (struct WalkContext *wctx, int do_walk)
{
if (wctx->sp == 0)
// Some artificial contexts may have %sp set to 0. See SETFUNCTIONCONTEXT()
return RA_FAILURE;
/* Check cached values */
int retc = cache_get (wctx);
if (retc != RA_FAILURE)
return retc;
/* An attempt to perform code analysis for call stack tracing */
unsigned char opcode;
unsigned char extop;
unsigned char extop2;
unsigned char modrm;
int imm8; /* immediate operand, byte */
int immv; /* immediate operand, word(2) or doubleword(4) */
int reg; /* register code */
/* Buffer for branch targets (analysis stoppers) */
unsigned char *targets[MAXTRGTS];
int ntrg = 0; /* number of entries in the table */
targets[ntrg++] = (unsigned char*) wctx->pc;
targets[ntrg++] = (unsigned char*) - 1;
struct AdvWalkContext buf[MAXCTX];
struct AdvWalkContext *cur = buf;
CALL_UTIL (memset)((void*) cur, 0, sizeof (*cur));
cur->pc = (unsigned char*) wctx->pc;
cur->sp = (unsigned long*) wctx->sp;
cur->sp_safe = cur->sp - RED_ZONE; /* allow for the 128-byte red zone on amd64 */
cur->fp = (unsigned long*) wctx->fp;
cur->tidx = 1;
DprintfT (SP_DUMP_UNWIND, "\nstack_unwind (x86 walk):%d %p start\n", __LINE__, cur->pc);
int nctx = 1; /* number of contexts being processed */
int cnt = 8192; /* number of instructions to analyse */
/*
* The basic idea of our x86 stack unwind is that we don't know
* if we can trust the frame-pointer register. So we walk
* instructions to find a return instruction, at which point
* we know the return address is on the top of the stack, etc.
*
* A severe challenge to walking x86 instructions is when we
* encounter "jmp *(reg)" instructions, where we are expected
* to jump to the (unknown-to-us) contents of a register.
*
* The "jmp_reg" code here attempts to keep track of the
* context for such a jump, deferring any handling of such
* a difficult case. We continue with other contexts, hoping
* that some other walk will take us to a return instruction.
*
* If no other walk helps, we return to "jmp_reg" contexts.
* While we don't know the jump target, it is possible that the
* bytes immediately following the jmp_reg instruction represent
* one possible target, as might be the case when a "switch"
* statement is compiled.
*
* Unfortunately, the bytes following a "jmp_reg" instruction might
* instead be a jump target from somewhere else -- execution might
* never "fall through" from the preceding "jmp_reg". Those bytes
* might not even be instructions at all. There are many uses of
* jmp_reg instructions beyond just compiling switch statements.
*
* So walking the bytes after a "jmp_reg" instruction can lead
* to bugs and undefined behavior, including SEGV and core dump.
*
* We currently do not really understand the "jmp_reg" code below.
*/
int jmp_reg_switch_mode = 0;
int num_jmp_reg = 0; // number of jmp *reg met when switch mode is off or when in current switch case
int total_num_jmp_reg = 0; // number of total jmp *reg met
struct AdvWalkContext * jmp_reg_ctx[MAXJMPREG]; // context of jmp *reg met when switch mode is off or when in current switch case
struct AdvWalkContext * jmp_reg_switch_ctx[MAXJMPREG]; // context of jmp *reg used in switch cases
struct AdvWalkContext * jmp_reg_switch_backup_ctx = NULL; // context of the first jmp *reg used in switch cases
int cur_jmp_reg_switch = 0; // current switch table
int num_jmp_reg_switch = 0; // number of switch table
int jmp_reg_switch_case = 0; // case number in current switch table
unsigned char * jmp_reg_switch_pc = NULL; // the start pc of current switch case
unsigned char * jmp_reg_switch_pc_old = NULL; // backup for deleteing context of jump target
unsigned char * jmp_reg_switch_base = NULL; // start pc for checking offsets
int max_jmp_reg_switch_case = 2;
#if WSIZE(32)
int max_switch_pc_offset = 512;
#else // WSIZE(64)
int max_switch_pc_offset = 1024;
#endif
int expected_num_jmp_reg = 1; // should be smaller than MAXJMPREG
int max_num_jmp_reg_seen = 4; // try to resolve return if there are so many such instructions
int save_ctx = 0; // flag to save walk context in the cache to speed up unwind
struct WalkContext wctx_pc_save;
if (do_walk == 0)
// do_walk is the flag indicating not walking through the instructions, resolving the RA from the stack fp first
__collector_memcpy (&wctx_pc_save, wctx, sizeof (struct WalkContext));
startWalk:
if (do_walk == 0)
{ // try to resolve RA from stack frame pointer
if (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL)
{
do_walk = 1;
goto startWalk;
}
// before goto checkFP, try the RA from cache (key: WalkContext -> value: caller's WalkContext))
uint64_t idx = wctx->pc * ROOT_IDX;
uint32_t val = OmpVals[idx % OmpValTableSize];
idx = (idx + val) * ROOT_IDX;
#ifdef USE_18434988_OMP_CACHE_WORKAROUND
// Check ra: if it is 0 - then cache is invalid
uint64_t idx4;
idx4 = (idx + val) * ROOT_IDX;
idx4 = (idx4 + val) * ROOT_IDX;
if (0 == OmpRAs[ idx4 % OmpValTableSize ]) // Invalid cache
goto checkFP;
#endif
struct WalkContext saved_ctx;
__collector_memcpy (&saved_ctx, &OmpCurCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext));
if (wctx->pc == saved_ctx.pc
&& wctx->sp == saved_ctx.sp
&& wctx->fp == saved_ctx.fp
&& wctx->tbgn == saved_ctx.tbgn
&& wctx->tend == saved_ctx.tend)
{ // key match, RA may be valid
idx = (idx + val) * ROOT_IDX;
unsigned long *sp = NULL;
unsigned long fp = wctx->fp;
int from_fp = 0;
if (val == RA_END_OF_STACK)
{
DprintfT (SP_DUMP_UNWIND, "find_i386_ret_addr:%d -- RA_END_OF_STACK: pc=0x%lx\n", __LINE__, wctx->pc);
__collector_memcpy (wctx, &OmpCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext));
return val;
}
else
{
if (fp < wctx->sp || fp >= wctx->sbase - sizeof (*sp))
{
TprintfT (DBG_LT1, "omp_cache_get -- wrong fp: pc=0x%lx\n", wctx->pc);
sp = (unsigned long *) (OmpCtxs[ idx % OmpValTableSize ].sp);
sp--;
if (sp < cur->sp_safe || (unsigned long) sp >= wctx->sbase)
{
goto checkFP;
}
unsigned long ra = *sp;
uint64_t idx2 = (idx + val) * ROOT_IDX;
if (OmpRAs[ idx2 % OmpValTableSize ] == ra)
{
__collector_memcpy (wctx, &OmpCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext));
TprintfT (DBG_LT1, "omp_cache_get -- ra match with target sp: pc=0x%lx, ra=0x%lx, val=%d\n", wctx->pc, ra, val);
return val;
}
TprintfT (DBG_LT1, "omp_cache_get -- ra mismatch: ra=0x%lx, expected ra=0x%lx, val=%d\n", ra, OmpRAs[ idx2 % OmpValTableSize ], val);
goto checkFP;
}
sp = (unsigned long *) fp;
from_fp = 1;
}
uint64_t idx2 = (idx + val) * ROOT_IDX;
unsigned long ra = *sp++;
if (from_fp)
{
unsigned long tbgn = wctx->tbgn;
unsigned long tend = wctx->tend;
if (ra < tbgn || ra >= tend)
{
sp = (unsigned long *) (OmpCtxs[ idx % OmpValTableSize ].sp);
sp--;
//if (sp < cur->sp_safe - 16 || (unsigned long)sp >= wctx->sbase - sizeof(*sp)) {
// The check above was replaced with the check below,
// because we do not know why "- 16" and "- sizeof(*sp)" was used.
if (sp < cur->sp_safe || (unsigned long) sp >= wctx->sbase)
goto checkFP;
else
ra = *sp;
}
}
if (OmpRAs[ idx2 % OmpValTableSize ] == ra)
{
TprintfT (DBG_LT1, "omp_cache_get -- ra match: pc=0x%lx\n", wctx->pc);
__collector_memcpy (wctx, &OmpCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext));
return val;
}
}
goto checkFP;
}
else
{
CALL_UTIL (memset)(jmp_reg_ctx, 0, MAXJMPREG * sizeof (struct AdvWalkContext *));
CALL_UTIL (memset)(jmp_reg_switch_ctx, 0, MAXJMPREG * sizeof (struct AdvWalkContext *));
}
while (cnt--)
{
if (nctx == 0 && (num_jmp_reg == expected_num_jmp_reg || jmp_reg_switch_mode == 1))
{ // no context available, try jmp switch mode
int i = 0;
if (num_jmp_reg == expected_num_jmp_reg)
jmp_reg_switch_mode = 0; // first jmp reg expected, restart switch mode
DprintfT (SP_DUMP_UNWIND, "unwind.c: begin switch mode, num_jmp_reg = %d, jmp_reg_switch_backup_ctx=%p, jmp_reg_switch_case=%d, jmp_reg_switch_mode=%d.\n",
num_jmp_reg, jmp_reg_switch_backup_ctx, jmp_reg_switch_case, jmp_reg_switch_mode);
// the ideal asm of switch is
// jmp reg
// ...//case 1
// ret
// ...//case 2
// ret
// ...//etc
if (jmp_reg_switch_mode == 0)
{
num_jmp_reg_switch = num_jmp_reg; // backup num_jmp_reg
jmp_reg_switch_mode = 1; // begin switch mode
for (i = 0; i < num_jmp_reg_switch; i++)
{
if (jmp_reg_switch_ctx[i] == NULL)
jmp_reg_switch_ctx[i] = (struct AdvWalkContext*) alloca (sizeof (*jmp_reg_switch_ctx[i]));
if (jmp_reg_switch_ctx[i] != NULL)
{ // backup jmp_reg_ctx
__collector_memcpy (jmp_reg_switch_ctx[i], jmp_reg_ctx[i], sizeof (*jmp_reg_switch_ctx[i]));
cur_jmp_reg_switch = 0; // reset the current switch table
jmp_reg_switch_case = 0; // reset the case number in current switch table
}
}
if (jmp_reg_switch_backup_ctx == NULL)
{ // only backup when the first jmp *reg is met for restoring later, if switch mode fails to resolve RA
jmp_reg_switch_backup_ctx = (struct AdvWalkContext*) alloca (sizeof (*jmp_reg_switch_backup_ctx));
if (jmp_reg_switch_backup_ctx != NULL)
__collector_memcpy (jmp_reg_switch_backup_ctx, cur, sizeof (*cur));
DprintfT (SP_DUMP_UNWIND, "unwind.c: back up context for switch mode.\n");
}
}
if (jmp_reg_switch_mode == 1)
{ // in the process of trying switch cases
if (cur_jmp_reg_switch == num_jmp_reg_switch)
{
DprintfT (SP_DUMP_UNWIND, "unwind.c: have tried all switch with max_jmp_reg_switch_case for each\n");
if (jmp_reg_switch_backup_ctx != NULL)
__collector_memcpy (cur, jmp_reg_switch_backup_ctx, sizeof (*cur));
int rc = process_return_real (wctx, cur, 0);
if (rc == RA_SUCCESS)
{
if (save_ctx)
omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
return rc;
}
break; // have tried all switch with max_jmp_reg_switch_case for each, goto checkFP
}
unsigned char *npc = jmp_reg_switch_ctx[cur_jmp_reg_switch]->pc;
if (jmp_reg_switch_case == 0)
// first switch case
npc = check_modrm (npc); // pc next to "jmp reg" instruction
else if (jmp_reg_switch_pc != NULL)
npc = jmp_reg_switch_pc; // // pc next to "ret" instruction of previous case
else
{
DprintfT (SP_DUMP_UNWIND, "unwind.c: unexpected jum switch mode situation, jmp_reg_switch_case=%d, jmp_reg_switch_pc=%p\n",
jmp_reg_switch_case, jmp_reg_switch_pc);
break; //goto checkFP
}
jmp_reg_switch_base = npc;
struct AdvWalkContext *new = buf + nctx;
nctx += 1;
__collector_memcpy (new, jmp_reg_switch_ctx[cur_jmp_reg_switch], sizeof (*new));
new->pc = npc;
cur = new; /* advance the new context first */
jmp_reg_switch_pc = NULL;
jmp_reg_switch_case++;
if (jmp_reg_switch_case == max_jmp_reg_switch_case)
{ // done many cases, change to another switch table
cur_jmp_reg_switch++;
jmp_reg_switch_case = 0;
}
}
num_jmp_reg = 0;
}
if (jmp_reg_switch_mode == 1)
{ // when processing switch cases, check pc each time
unsigned long tbgn = wctx->tbgn;
unsigned long tend = wctx->tend;
if ((unsigned long) (cur->pc) < tbgn || (unsigned long) (cur->pc) >= tend)
{
DprintfT (SP_DUMP_UNWIND, "unwind.c: pc out of range, pc=0x%lx\n", (unsigned long) (cur->pc));
break;
}
if (jmp_reg_switch_base != NULL && cur->pc > jmp_reg_switch_base + max_switch_pc_offset)
{
DprintfT (SP_DUMP_UNWIND, "unwind.c: limit the walk offset after jmp reg instruction\n");
if (jmp_reg_switch_backup_ctx != NULL)
__collector_memcpy (cur, jmp_reg_switch_backup_ctx, sizeof (*cur));
int rc = process_return_real (wctx, cur, 0);
if (rc == RA_SUCCESS)
{
if (save_ctx)
omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
return rc;
}
break; // limit the walk offset after jmp reg instruction, got checkFP
}
}
if (nctx == 0)
break;
// dump_targets (__LINE__, ntrg, targets);
while (cur->pc > targets[cur->tidx])
cur->tidx += 1;
if (cur->pc == targets[cur->tidx])
{
/* Stop analysis. Delete context. */
if (jmp_reg_switch_mode == 0 || cur->pc != jmp_reg_switch_pc_old)
{
if (jmp_reg_switch_mode == 1 && nctx == 1 && jmp_reg_switch_pc == NULL)
{
DprintfT (SP_DUMP_UNWIND, "unwind.c:%d old target, cur->pc=%p, jmp_reg_switch_pc=%p, nctx=%d\n",
__LINE__, cur->pc, jmp_reg_switch_pc, nctx);
jmp_reg_switch_pc = cur->pc; // save cp before delete context, may be used as a start of switch case
jmp_reg_switch_pc_old = jmp_reg_switch_pc;
}
DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, old target.\n", __LINE__);
DELETE_CURCTX ();
if (cur >= buf + nctx)
cur = buf;
continue;
}
if (jmp_reg_switch_mode == 1 && cur->pc == jmp_reg_switch_pc_old)
jmp_reg_switch_pc_old = NULL; // reset jmp_reg_switch_pc_old to delete the context later when cur->pc != jmp_reg_switch_pc_old
}
/* let's walk the next x86 instruction */
DprintfT (SP_DUMP_UNWIND, "unwind.c:%d cur:%ld pc=0x%lx %02x %02x %02x %02x %02x %02x %02x sp=0x%lx\n",
__LINE__, (long) (cur - buf), (unsigned long) cur->pc,
(int) cur->pc[0], (int) cur->pc[1], (int) cur->pc[2],
(int) cur->pc[3], (int) cur->pc[4], (int) cur->pc[5],
(int) cur->pc[6], (unsigned long) cur->sp);
int v = 4; /* Operand size */
int a = 4; /* Address size */
/* int W = 0; REX.W bit */
#if WSIZE(64)
int R = 0; /* REX.R bit */
#endif
int X = 0; /* REX.X bit */
int B = 0; /* REX.B bit */
/* Check prefixes */
int done = 0;
while (!done)
{
opcode = *cur->pc++;
switch (opcode)
{
case 0x66: /* opd size override */
v = 2;
break;
case 0x67: /*addr size override */
a = 2;
break;
#if WSIZE(64)
case 0x40: /* REX */
case 0x41:
case 0x42:
case 0x43:
case 0x44:
case 0x45:
case 0x46:
case 0x47:
case 0x48:
case 0x49:
case 0x4a:
case 0x4b:
case 0x4c:
case 0x4d:
case 0x4e:
case 0x4f:
B = (opcode & 0x1) ? 8 : 0;
X = (opcode & 0x2) ? 8 : 0;
R = (opcode & 0x4) ? 8 : 0;
if (opcode & 0x8) /* 64 bit operand size */
v = 8;
opcode = *cur->pc++;
done = 1;
break;
#endif
default:
done = 1;
break;
}
}
int z = (v == 8) ? 4 : v;
switch (opcode)
{
case 0x0: /* add Eb,Gb */
case 0x01: /* add Ev,Gv */
case 0x02: /* add Gb,Eb */
case 0x03: /* add Gv,Ev */
cur->pc = check_modrm (cur->pc);
break;
case 0x04: /* add %al,Ib */
cur->pc += 1;
break;
case 0x05: /* add %eax,Iz */
cur->pc += z;
break;
case 0x06: /* push es */
cur->sp -= 1;
break;
case 0x07: /* pop es */
cur->sp += 1;
if (cur->sp - RED_ZONE > cur->sp_safe)
cur->sp_safe = cur->sp - RED_ZONE;
break;
case 0x08: /* or Eb,Gb */
case 0x09: /* or Ev,Gv */
case 0x0a: /* or Gb,Eb */
case 0x0b: /* or Gv,Ev */
cur->pc = check_modrm (cur->pc);
break;
case 0x0c: /* or %al,Ib */
cur->pc += 1;
break;
case 0x0d: /* or %eax,Iz */
cur->pc += z;
break;
case 0x0e: /* push cs */
cur->sp -= 1;
break;
case 0x0f: /* two-byte opcodes */
extop = *cur->pc++;
switch (extop)
{ /* RTM or HLE */
case 0x01:
extop2 = *cur->pc;
switch (extop2)
{
case 0xd5: /* xend */
case 0xd6: /* xtest */
cur->pc++;
break;
default:
break;
}
break;
case 0x03:
cur->pc = check_modrm (cur->pc);
break;
case 0x0b:
DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, undefined instruction. opcode=0x%02x\n",
__LINE__, (int) opcode);
DELETE_CURCTX ();
break;
case 0x05: /* syscall */
case 0x34: /* sysenter */
if (cur->rax == __NR_exit)
{
DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode=0x%02x\n",
__LINE__, (int) opcode);
DELETE_CURCTX ();
break;
}
else if (cur->rax == __NR_rt_sigreturn)
{
if (jmp_reg_switch_mode == 1)
{
DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address under jmp switch mode, opcode=0x%02x\n",
__LINE__, (int) opcode);
goto checkFP;
}
wctx->sp = (unsigned long) cur->sp;
if (save_ctx)
omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_RT_SIGRETURN);
return RA_RT_SIGRETURN;
}
#if WSIZE(32)
else if (cur->rax == __NR_sigreturn)
{
if (jmp_reg_switch_mode == 1)
{
DprintfT (SP_DUMP_UNWIND, "unwind.c: give up return address under jmp switch mode, opcode = 0x34\n");
goto checkFP;
}
wctx->sp = (unsigned long) cur->sp;
if (save_ctx)
omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_SIGRETURN);
return RA_SIGRETURN;
}
#endif
/* Check for Linus' trick in the vsyscall page */
while (*cur->pc == 0x90) /* nop */
cur->pc++;
if (*cur->pc == 0xeb) /* jmp imm8 */
cur->pc += 2;
break;
case 0x0d: /* nop Ev */
cur->pc = check_modrm (cur->pc);
break;
case 0x10: /* xmm Vq,Wq */
case 0x11:
case 0x12:
case 0x13:
case 0x14:
case 0x15:
case 0x16:
case 0x17:
cur->pc = check_modrm (cur->pc);
break;
case 0x18: /* prefetch */
cur->pc = check_modrm (cur->pc);
break;
case 0x1E: /* endbr64/endbr32 (f3 0f 1e .. ) is parsing as repz nop edx */
cur->pc += 2;
break;
case 0x1f: /* nop Ev */
cur->pc = check_modrm (cur->pc);
break;
case 0x28: /* xmm Vq,Wq */
case 0x29:
case 0x2a:
case 0x2b:
case 0x2c:
case 0x2d:
case 0x2e:
case 0x2f:
cur->pc = check_modrm (cur->pc);
break;
case 0x30: /* wrmsr */
case 0x31: /* rdtsc */
case 0x32: /* rdmsr */
case 0x33: /* rdpmc */
break;
/* case 0x34: sysenter (see above) */
case 0x38: case 0x3a:
extop2 = *cur->pc++;
cur->pc = check_modrm (cur->pc);
// 21275311 Unwind failure in native stack for java application running on jdk8
// Three-byte opcodes "66 0f 3a ??" should consume an additional "immediate" byte.
if (extop == 0x3a)
cur->pc++;
break;
case 0x40: case 0x41: case 0x42: case 0x43: /* CMOVcc Gv,Ev */
case 0x44: case 0x45: case 0x46: case 0x47:
case 0x48: case 0x49: case 0x4a: case 0x4b:
case 0x4c: case 0x4d: case 0x4e: case 0x4f:
cur->pc = check_modrm (cur->pc);
break;
case 0x50: case 0x51: case 0x52: case 0x53:
case 0x54: case 0x55: case 0x56: case 0x57:
case 0x58: case 0x59: case 0x5a: case 0x5b:
case 0x5c: case 0x5d: case 0x5e: case 0x5f:
case 0x60: case 0x61: case 0x62: case 0x63:
case 0x64: case 0x65: case 0x66: case 0x67:
case 0x68: case 0x69: case 0x6a: case 0x6b:
case 0x6c: case 0x6d: case 0x6e: case 0x6f:
cur->pc = check_modrm (cur->pc);
break;
case 0x70: case 0x71: case 0x72: case 0x73:
cur->pc = check_modrm (cur->pc) + 1;
break;
case 0x74: case 0x75: case 0x76:
cur->pc = check_modrm (cur->pc);
break;
case 0x77:
break;
case 0x7c: case 0x7d: case 0x7e: case 0x7f:
cur->pc = check_modrm (cur->pc);
break;
case 0x80: case 0x81: case 0x82: case 0x83: /* Jcc Jz */
case 0x84: case 0x85: case 0x86: case 0x87:
case 0x88: case 0x89: case 0x8a: case 0x8b:
case 0x8c: case 0x8d: case 0x8e: case 0x8f:
immv = read_int (cur->pc, z);
cur->pc += z;
if (nctx < (jmp_reg_switch_mode ? MAXJMPREGCTX : MAXCTX))
{
int tidx = 0;
unsigned char *npc = cur->pc + immv;
if ((unsigned long) npc < wctx->tbgn || (unsigned long) npc >= wctx->tend)
{
DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode=0x%02x\n",
__LINE__, (int) opcode);
DELETE_CURCTX ();
break;
}
if (is_after_ret (npc))
break;
while (npc > targets[tidx])
tidx += 1;
if (npc != targets[tidx])
{
if (ntrg < MAXTRGTS)
{
for (int i = 0; i < nctx; i++)
if (buf[i].tidx >= tidx)
buf[i].tidx++;
/* insert a new target */
for (int i = ntrg; i > tidx; i--)
targets[i] = targets[i - 1];
ntrg += 1;
targets[tidx++] = npc;
}
else
DprintfT (SP_DUMP_UNWIND, "unwind.c:%d ntrg=max(%d)\n",
__LINE__, ntrg);
struct AdvWalkContext *new = buf + nctx;
nctx += 1;
__collector_memcpy (new, cur, sizeof (*new));
new->pc = npc;
new->tidx = tidx;
cur = new; /* advance the new context first */
continue;
}
}
else
DprintfT (SP_DUMP_UNWIND, "unwind.c:%d nctx=max(%d)\n",
__LINE__, ntrg);
break;
case 0x90: case 0x91: case 0x92: case 0x93: /* setcc Eb */
case 0x94: case 0x95: case 0x96: case 0x97:
case 0x98: case 0x99: case 0x9a: case 0x9b:
case 0x9c: case 0x9d: case 0x9e: case 0x9f:
cur->pc = check_modrm (cur->pc);
break;
case 0xa0: /* push fs */
cur->sp -= 1;
break;
case 0xa1: /* pop fs */
cur->sp += 1;
if (cur->sp - RED_ZONE > cur->sp_safe)
cur->sp_safe = cur->sp - RED_ZONE;
break;
case 0xa2: /* cpuid */
break;
case 0xa3: /* bt Ev,Gv */
cur->pc = check_modrm (cur->pc);
break;
case 0xa4: /* shld Ev,Gv,Ib */
cur->pc = check_modrm (cur->pc);
cur->pc += 1;
break;
case 0xa5: /* shld Ev,Gv,%cl */
cur->pc = check_modrm (cur->pc);
break;
case 0xa8: /* push gs */
cur->sp -= 1;
break;
case 0xa9: /* pop gs */
cur->sp += 1;
if (cur->sp - RED_ZONE > cur->sp_safe)
cur->sp_safe = cur->sp - RED_ZONE;
break;
case 0xaa: /* rsm */
break;
case 0xab: /* bts Ev,Gv */
cur->pc = check_modrm (cur->pc);
break;
case 0xac: /* shrd Ev,Gv,Ib */
cur->pc = check_modrm (cur->pc);
cur->pc += 1;
break;
case 0xad: /* shrd Ev,Gv,%cl */
cur->pc = check_modrm (cur->pc);
break;
case 0xae: /* group15 */
cur->pc = check_modrm (cur->pc);
break;
case 0xaf: /* imul Gv,Ev */
cur->pc = check_modrm (cur->pc);
break;
case 0xb1: /* cmpxchg Ev,Gv */
cur->pc = check_modrm (cur->pc);
break;
case 0xb3:
case 0xb6: /* movzx Gv,Eb */
case 0xb7: /* movzx Gv,Ew */
cur->pc = check_modrm (cur->pc);
break;
case 0xba: /* group8 Ev,Ib */
cur->pc = check_modrm (cur->pc);
cur->pc += 1;
break;
case 0xbb: /* btc Ev,Gv */
case 0xbc: /* bsf Gv,Ev */
case 0xbd: /* bsr Gv,Ev */
cur->pc = check_modrm (cur->pc);
break;
case 0xbe: /* movsx Gv,Eb */
case 0xbf: /* movsx Gv,Ew */
cur->pc = check_modrm (cur->pc);
break;
case 0xc0: /* xadd Eb,Gb */
case 0xc1: /* xadd Ev,Gv */
cur->pc = check_modrm (cur->pc);
break;
case 0xc2: /* cmpps V,W,Ib */
cur->pc = check_modrm (cur->pc);
cur->pc += 1;
break;
case 0xc3: /* movnti M,G */
cur->pc = check_modrm (cur->pc);
break;
case 0xc6: /* shufps V,W,Ib */
cur->pc = check_modrm (cur->pc);
cur->pc += 1;
break;
case 0xc7: /* RDRAND */
cur->pc = check_modrm (cur->pc);
break;
case 0xc8: case 0xc9: case 0xca: case 0xcb: /* bswap */
case 0xcc: case 0xcd: case 0xce: case 0xcf:
break;
case 0xd0: case 0xd1: case 0xd2: case 0xd3:
case 0xd4: case 0xd5: case 0xd6: case 0xd7:
case 0xd8: case 0xd9: case 0xda: case 0xdb:
case 0xdc: case 0xdd: case 0xde: case 0xdf:
case 0xe0: case 0xe1: case 0xe2: case 0xe3:
case 0xe4: case 0xe5: case 0xe6: case 0xe7:
case 0xe8: case 0xe9: case 0xea: case 0xeb:
case 0xec: case 0xed: case 0xee: case 0xef:
case 0xf0: case 0xf1: case 0xf2: case 0xf3:
case 0xf4: case 0xf5: case 0xf6: case 0xf7:
case 0xf8: case 0xf9: case 0xfa: case 0xfb:
case 0xfc: case 0xfd: case 0xfe: case 0xff:
cur->pc = check_modrm (cur->pc);
break;
default:
if (jmp_reg_switch_mode == 1 && extop == 0x0b)
DprintfT (SP_DUMP_UNWIND, "unwind.c:%d invalid opcode ub2: 0x0f %x jmp_reg_switch_mode=%d\n",
__LINE__, (int) extop, jmp_reg_switch_mode);
else
{
DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0x0f %x jmp_reg_switch_mode=%d\n",
__LINE__, (int) extop, jmp_reg_switch_mode);
DELETE_CURCTX ();
}
break;
}
break;
case 0x10: /* adc Eb,Gb */
case 0x11: /* adc Ev,Gv */
case 0x12: /* adc Gb,Eb */
case 0x13: /* adc Gv,Ev */
cur->pc = check_modrm (cur->pc);
break;
case 0x14: /* adc %al,Ib */
cur->pc += 1;
break;
case 0x15: /* adc %eax,Iz */
cur->pc += z;
break;
case 0x16: /* push ss */
cur->sp -= 1;
break;
case 0x17: /* pop ss */
cur->sp += 1;
if (cur->sp - RED_ZONE > cur->sp_safe)
cur->sp_safe = cur->sp - RED_ZONE;
break;
case 0x18: /* sbb Eb,Gb */
case 0x19: /* sbb Ev,Gv */
case 0x1a: /* sbb Gb,Eb */
case 0x1b: /* sbb Gv,Ev */
cur->pc = check_modrm (cur->pc);
break;
case 0x1c: /* sbb %al,Ib */
cur->pc += 1;
break;
case 0x1d: /* sbb %eax,Iz */
cur->pc += z;
break;
case 0x1e: /* push ds */
cur->sp -= 1;
break;
case 0x1f: /* pop ds */
cur->sp += 1;
if (cur->sp - RED_ZONE > cur->sp_safe)
cur->sp_safe = cur->sp - RED_ZONE;
break;
case 0x20: /* and Eb,Gb */
case 0x21: /* and Ev,Gv */
case 0x22: /* and Gb,Eb */
case 0x23: /* and Gv,Ev */
cur->pc = check_modrm (cur->pc);
break;
case 0x24: /* and %al,Ib */
cur->pc += 1;
break;
case 0x25: /* and %eax,Iz */
cur->pc += z;
break;
case 0x26: /* seg=es prefix */
break;
case 0x27: /* daa */
break;
case 0x28: /* sub Eb,Gb */
case 0x29: /* sub Ev,Gv */
case 0x2a: /* sub Gb,Eb */
case 0x2b: /* sub Gv,Ev */
cur->pc = check_modrm (cur->pc);
break;
case 0x2c: /* sub %al,Ib */
cur->pc += 1;
break;
case 0x2d: /* sub %eax,Iz */
cur->pc += z;
break;
case 0x2e: /* seg=cs prefix */
break;
case 0x2f: /* das */
break;
case 0x30: /* xor Eb,Gb */
case 0x31: /* xor Ev,Gv */
case 0x32: /* xor Gb,Eb */
case 0x33: /* xor Gv,Ev */
cur->pc = check_modrm (cur->pc);
break;
case 0x34: /* xor %al,Ib */
cur->pc += 1;
break;
case 0x35: /* xor %eax,Iz */
cur->pc += z;
break;
case 0x36: /* seg=ss prefix */
break;
case 0x37: /* aaa */
break;
case 0x38: /* cmp Eb,Gb */
case 0x39: /* cmp Ev,Gv */
case 0x3a: /* cmp Gb,Eb */
case 0x3b: /* cmp Gv,Ev */
cur->pc = check_modrm (cur->pc);
break;
case 0x3c: /* cmp %al,Ib */
cur->pc += 1;
break;
case 0x3d: /* cmp %eax,Iz */
cur->pc += z;
break;
case 0x3e: /* seg=ds prefix */
break;
case 0x3f: /* aas */
break;
#if WSIZE(32)
case 0x40: /* inc %eax */
case 0x41: /* inc %ecx */
case 0x42: /* inc %edx */
case 0x43: /* inc %ebx */
break;
case 0x44: /* inc %esp */
/* Can't be a valid stack pointer - delete context */
DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0x44.\n", __LINE__);
DELETE_CURCTX ();
break;
case 0x45: /* inc %ebp */
case 0x46: /* inc %esi */
case 0x47: /* inc %edi */
case 0x48: /* dec %eax */
case 0x49: /* dec %ecx */
case 0x4a: /* dec %edx */
case 0x4b: /* dec %ebx */
break;
case 0x4c: /* dec %esp */
/* Can't be a valid stack pointer - delete context */
DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0x4c.\n", __LINE__);
DELETE_CURCTX ();
break;
case 0x4d: /* dec %ebp */
case 0x4e: /* dec %esi */
case 0x4f: /* dec %edi */
break;
#endif
case 0x50: /* push %eax */
case 0x51: /* push %ecx */
case 0x52: /* push %edx */
case 0x53: /* push %ebx */
case 0x54: /* push %esp */
case 0x55: /* push %ebp */
case 0x56: /* push %esi */
case 0x57: /* push %edi */
cur->sp -= 1;
reg = OPC_REG (opcode);
if (reg == RBP)
{
#if 0
/* Don't do this check yet. Affects tail calls. */
/* avoid other function's prologue */
if ((cur->pc[0] == 0x89 && cur->pc[1] == 0xe5) ||
(cur->pc[0] == 0x8b && cur->pc[1] == 0xec))
{
/* mov %esp,%ebp */
DELETE_CURCTX ();
break;
}
#endif
if (cur->fp_loc == NULL)
{
cur->fp_loc = cur->sp;
cur->fp_sav = cur->fp;
}
}
break;
case 0x58: /* pop %eax */
case 0x59: /* pop %ecx */
case 0x5a: /* pop %edx */
case 0x5b: /* pop %ebx */
case 0x5c: /* pop %esp */
case 0x5d: /* pop %ebp */
case 0x5e: /* pop %esi */
case 0x5f: /* pop %edi */
reg = OPC_REG (opcode);
cur->regs[reg] = 0;
if (isInside ((unsigned long) cur->sp, (unsigned long) cur->sp_safe, wctx->sbase))
cur->regs[reg] = *cur->sp;
DprintfT (SP_DUMP_UNWIND, "stack_unwind:%d cur->regs[%d]=0x%lx\n",
__LINE__, reg, (unsigned long) cur->regs[