blob: 5362eeb2179ebc00678225e40bd0ee60063e16d8 [file] [log] [blame]
#!/usr/bin/env python
# Copyright 2016 The Fuchsia Authors
#
# Use of this source code is governed by a MIT-style
# license that can be found in the LICENSE file or at
# https://opensource.org/licenses/MIT
"""
This tool will symbolize a crash from Zircon's crash logger, adding
function names and, if available, source code locations (filenames and
line numbers from debug info).
Example usage #1:
./scripts/run-zircon -a x64 | ./scripts/symbolize devmgr.elf --build-dir=build-x64
Example usage #2:
./scripts/symbolize devmgr.elf --build-dir=build-x64
<copy and paste output from Zircon>
Example usage #3 (for zircon kernel output):
./scripts/symbolize --build-dir=build-x64
<copy and paste output from Zircon>
"""
import argparse
import errno
import os
import re
import subprocess
import sys
SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
PREBUILTS_BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(SCRIPT_DIR), "prebuilt",
"downloads"))
GCC_VERSION = '6.3.0'
name_to_full_path = {}
debug_mode = False
# Paths to various external tools can be provided on the command line.
# If these are None then use the prebuilt location.
addr2line_tool_path = None
gdb_tool_path = None
def find_func(find_args, dirname, names):
if find_args["path"] != "": # found something!
return
if dirname.find("sysroot") != -1:
return
for name in names:
if name == find_args["name"]:
find_args["path"] = dirname
return
def find_file_in_build_dir(name, build_dirs):
find_args = {"name": name, "path": ""}
for location in build_dirs:
os.path.walk(location, find_func, find_args)
if find_args["path"] != "":
return os.path.abspath(os.path.join(find_args["path"], name))
return None
def buildid_to_full_path(buildid, build_dirs):
for build_dir in build_dirs:
id_file_path = os.path.join(build_dir, "ids.txt")
if os.path.exists(id_file_path):
with open(id_file_path) as id_file:
for line in id_file:
id, path = line.split()
if id == buildid:
return path
return None
def find_file_in_boot_manifest(boot_app_name, build_dirs):
manifest_path = find_file_in_build_dir("bootfs.manifest", build_dirs)
if manifest_path:
with open(manifest_path) as manifest_file:
for line in manifest_file:
out_path, in_path = line.rstrip().split("=")
if out_path == boot_app_name:
if in_path.endswith(".strip"):
in_path = in_path[:-len(".strip")]
return in_path
return None
def find_dso_full_path_uncached(dso, exe_name, name_to_buildid, build_dirs):
if dso in name_to_buildid:
found_path = buildid_to_full_path(name_to_buildid[dso], build_dirs)
if found_path:
return found_path
# This can be a bug in the generation of the ids.txt file, report it.
# It's not necessarily a bug though, so for now only report in debug mode.
if debug_mode:
print "WARNING: Unable to find %s in any ids.txt file." % dso
# The name 'app' indicates the real app name is unknown.
# If the process has a name property that will be printed, but
# it has a max of 32 characters so it may be insufficient.
# Crashlogger prefixes such names with "app:" for our benefit.
if dso == "app" or dso.startswith("app:"):
# If an executable was passed on the command-line, try using that
if exe_name:
found_path = find_file_in_build_dir(exe_name, build_dirs)
if found_path:
return found_path
# If this looks like a program in boot fs, consult the manifest
if dso.startswith("app:/boot/"):
boot_app_name = dso[len("app:/boot/"):]
found_path = find_file_in_boot_manifest(boot_app_name, build_dirs)
if found_path:
return found_path
return None
# First, try an exact match for the filename
found_path = find_file_in_build_dir(dso, build_dirs)
if not found_path:
# If that fails, and this file doesn't end with .so, try the executable
# name
if not dso.endswith(".so"):
found_path = find_file_in_build_dir(exe_name, build_dirs)
if not found_path:
# If that still fails and this looks like an absolute path, try the
# last path component
if dso.startswith("/"):
short_name = dso[dso.rfind("/"):]
found_path = find_file_in_build_dir(short_name, build_dirs)
return found_path
def find_dso_full_path(dso, exe_name, name_to_buildid, build_dirs):
if dso in name_to_full_path:
return name_to_full_path[dso]
found_path = find_dso_full_path_uncached(dso, exe_name, name_to_buildid, build_dirs)
if found_path:
name_to_full_path[dso] = found_path
return found_path
def tool_path(arch, tool, user_provided_path):
if user_provided_path is not None:
return user_provided_path
if sys.platform.startswith("linux"):
platform = "Linux"
elif sys.platform.startswith("darwin"):
platform = "Darwin"
else:
raise Exception("Unsupported platform!")
return ("%s/gcc/bin/%s-elf-%s" %
(PREBUILTS_BASE_DIR, arch, tool))
def run_tool(path, *args):
cmd = [path] + list(args)
if debug_mode:
print "Running: %s" % " ".join(cmd)
try:
output = subprocess.check_output(cmd)
except Exception as e:
tool = os.path.basename(path)
print "Calling %s failed: command %s error %s" % (tool, cmd, e)
return False
return output
# Note: addr2line requires hex addresses.
# |addr_as_hex_string| must already be PIE-adjusted.
def run_addr2line(arch, elf_path, addr_as_hex_string):
path = tool_path(arch, "addr2line", addr2line_tool_path)
return run_tool(path, "-Cipfe", elf_path, addr_as_hex_string)
def run_gdb(arch, *args):
path = tool_path(arch, "gdb", gdb_tool_path)
return run_tool(path, *args)
def get_call_location(arch, elf_path, addr_as_hex_string):
# Subtract 1 to convert from a return address to a call site
# address. (To be more exact, this converts to an address that
# is within the call site instruction.) This adjustment gives
# more correct results in the presence of inlining and
# 'noreturn' functions. (See ZX-842.)
call_addr = "0x%x" % (int(addr_as_hex_string, 16) - 1)
return run_addr2line(arch, elf_path, call_addr)
# On BSD platforms there are cases where writing to stdout can return EAGAIN.
# In that event, retry the line again. This only manifests itself when piping
# qemu's stdout directly to this script.
def writelines(lines):
for line in lines:
writeline(line)
def writeline(line):
while True:
try:
sys.stdout.write(line)
except IOError as e:
if e.errno == errno.EAGAIN:
continue
break
# Offset the address based on binary code start and bias
# Return same type than input
def kaslr_offset(addr, code_start, bias):
if not code_start or not bias:
return addr
is_string = isinstance(addr, str)
if is_string:
addr = int(addr, 16)
addr -= bias - code_start
if is_string:
return '%x' % addr
return addr
def main():
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument("--file", "-f", nargs="?", type=argparse.FileType("r"),
default=sys.stdin,
help="File to read from, stdin by default")
parser.add_argument("--build-dir", "-b", nargs="*",
help="List of build directories to search instead of the default (out/x64)")
parser.add_argument("--disassemble", "-d", action="store_true",
help="Show disassembly of each function")
parser.add_argument("--stack_size", "-s", type=int,
default=256*1024,
help="Change the assumed size of the stack (e.g. use 1048576 for ftl or "
"mtl default thread size")
parser.add_argument("--echo", dest="echo", action="store_true",
help="Echo lines of input (on by default)")
parser.add_argument("--no-echo", dest="echo", action="store_false",
help="Don't echo lines of input")
parser.add_argument("--debug", "-D", action="store_true",
help="Print messages for debugging symbolize")
parser.add_argument("--addr2line", default=None,
help="Path of addr2line program to use")
parser.add_argument("--gdb", default=None,
help="Path of gdb program to use")
parser.add_argument("app", nargs="?", help="Name of primary application")
parser.set_defaults(echo=True)
args = parser.parse_args()
global debug_mode
debug_mode = args.debug
global addr2line_tool_path
addr2line_tool_path = args.addr2line
global gdb_tool_path
gdb_tool_path = args.gdb
zircon_build_dir = os.path.join(
os.path.dirname(SCRIPT_DIR), "build-x64")
if not os.path.exists(zircon_build_dir):
zircon_build_dir = os.path.join(
os.path.dirname(SCRIPT_DIR), os.pardir, "out", "build-zircon", "build-x64")
build_dirs = [zircon_build_dir]
if args.build_dir:
build_dirs = args.build_dir + build_dirs
else:
# Put the unstripped path ahead of the stripped one, we want the
# former searched first. This does mean the unstripped directory
# will get searched twice, but relative to the entire search time,
# the addition is small.
# Plus once a file is found its location is cached.
# Plus this is only used as a fallback in case the file isn't found
# in ids.txt.
fuchsia_build_dir = os.path.abspath(os.path.join(
os.path.dirname(SCRIPT_DIR), os.pardir, "out", "x64"))
fuchsia_unstripped_dir = os.path.join(fuchsia_build_dir, "exe.unstripped")
build_dirs = [fuchsia_unstripped_dir, fuchsia_build_dir] + build_dirs
# Parsing vars
arch = "x86_64"
bias = 0
name_to_buildid = {}
bias_to_name = {}
processed_lines = []
prev_sp = None
prev_frame_num = None
frame_sizes = []
total_stack_size = 0
# If True and we see a dso line, start over collecting the list.
done_dso_list = True
# Regex for parsing
# Either nothing, or something like "[00007.268] 00304.00325> "
full_prefix = "^(|\[\d+\.\d+\] \d+\.\d+> ?)"
btre = re.compile(full_prefix + "bt#(\d+):")
bt_with_offset_re = re.compile(full_prefix +
"bt#(\d+): pc 0x[0-9a-f]+ sp (0x[0-9a-f]+) \(([^,]+),(0x[0-9a-f]+)\)$")
bt_end_re = re.compile(full_prefix + "bt#(\d+): end")
arch_re = re.compile(full_prefix + "arch: ([\\S]+)$")
build_id_re = re.compile(full_prefix +
"(?:dlsvc: debug: )?dso: id=([0-9a-z]+) base=(0x[0-9a-f]+) name=(.+)$")
disasm_re = re.compile("^ *(0x[0-9a-f]+)( .+)$")
# Zircon backtraces
zircon_crash_re = re.compile(full_prefix + "ZIRCON KERNEL PANIC$")
# TODO(cja): Add ARM to the regex
zircon_pc_re = re.compile("RIP: (0x[0-9a-z]+)")
zircon_bt_re = re.compile(full_prefix +
"bt#(\d+): (\dx[0-9a-fA-F]+)$")
zircon_nm_codestart = re.compile('^([a-f0-9]+) t __code_start$', re.M)
zircon_elf_path = ''
zircon_code_start = None
zircon_bt = False
zircon_pc = ''
# ASAN backtraces
asan_bt_re = re.compile(full_prefix + "\{\{\{bt:(\d+):(0x[0-9a-f]+)\}\}\}")
asan_bt_end_re = re.compile(full_prefix + "$")
asan_bt = False
while True:
line = args.file.readline()
if args.echo and not args.file.isatty():
writeline(line)
end_of_file = (line == '')
# Strip any trailing carriage return character ("\r"), since
# these appear in the serial console output from QEMU.
line = line.rstrip()
bt_end = bt_end_re.match(line)
asan_bt_end = asan_bt and asan_bt_end_re.match(line)
if bt_end or asan_bt_end or end_of_file:
if len(processed_lines) != 0:
writeline("\nstart of symbolized stack:\n")
writelines(processed_lines)
writeline("end of symbolized stack\n")
if total_stack_size > args.stack_size - 8*1024:
if total_stack_size >= args.stack_size:
message = "Overflowed stack"
else:
message = "Potentially overflowed stack"
writeline("WARNING: %s (total usage: %d, stack size: %d)\n" %
(message, total_stack_size, args.stack_size))
for frame, size in frame_sizes:
writeline("#%s: %d bytes\n" % (frame, size))
# Since we don't have the last stack frame, leave some leeway
# when warning about stackoverflow
processed_lines = []
frames_sizes = []
total_stack_size = 0
prev_sp = None
zircon_bt = False
asan_bt = False
if end_of_file:
break
else:
continue
m = arch_re.match(line)
if m:
arch = m.group(2)
continue
m = build_id_re.match(line)
if m:
if done_dso_list:
name_to_buildid = {}
bias_to_name = {}
done_dso_list = False
buildid = m.group(2)
bias = int(m.group(3), 16)
name = m.group(4)
name_to_buildid[name] = buildid
bias_to_name[bias] = name
if zircon_code_start and zircon_code_start != bias:
if zircon_code_start < bias:
diff = bias - zircon_code_start
c = '+'
else:
diff = zircon_code_start - bias
c = '-'
writeline('kaslr offset is %c0x%x' % (c, diff))
continue
# We didn't see a dso line, so we're done with this list.
# The next time we see one means we're starting a new list.
done_dso_list = True
m = btre.match(line)
if m and not zircon_bt:
frame_num = m.group(2)
m = bt_with_offset_re.match(line)
if m:
sp = int(m.group(3), 16)
if prev_sp is not None:
frame_size = sp - prev_sp
total_stack_size += frame_size
frame_sizes.append((prev_frame_num, frame_size))
prev_sp = sp
prev_frame_num = frame_num
dso = m.group(4)
off = m.group(5)
# Adapt offset for KASLR move
off = kaslr_offset(off, zircon_code_start, bias)
dso_full_path = find_dso_full_path(
dso, args.app, name_to_buildid, build_dirs)
if dso_full_path:
call_loc = get_call_location(arch, dso_full_path, off)
if call_loc:
processed_lines.append(
"#%s: %s" % (frame_num, call_loc))
if args.disassemble:
pc = int(off, 16)
disassembly = run_gdb(
arch, "--nx", "--batch", "-ex",
"disassemble %#x" % pc, dso_full_path)
if disassembly:
for line in disassembly.splitlines():
m = disasm_re.match(line)
if m:
addr, rest = m.groups()
addr = int(addr, 16)
if addr == pc:
prefix = "=> "
else:
prefix = " "
line = "%s%#.16x%s" % (prefix, bias + addr, rest)
processed_lines.append(line + "\n")
continue
else:
# can't find dso_full_path
processed_lines.append("#%s: unknown, can't find full path for %s\n" %
(frame_num, dso))
else:
# bt_with_offset_re failed
processed_lines.append("#%s: unknown, can't find pc, sp or app/library in line\n" %
(frame_num,))
# Zircon Specific Handling
if zircon_crash_re.search(line):
zircon_elf_path = find_file_in_build_dir("zircon.elf", build_dirs)
if not zircon_elf_path:
sys.stderr.write("Symbolize could not find the zircon elf binary. Perhaps you need "
"to build zircon or specify the build directory with -b?\n")
continue
zircon_bt = True
nm_result = run_tool(arch, "nm", zircon_elf_path)
m = zircon_nm_codestart.search(nm_result)
if not m:
sys.stderr.write("Failed to find __code_start from nm")
continue
zircon_code_start = int(m.group(1), 16)
continue
m = zircon_pc_re.search(line)
if m:
zircon_pc = kaslr_offset(m.group(1), zircon_code_start, bias)
continue
m = zircon_bt_re.match(line)
if m and zircon_bt:
frame_num = m.group(2)
addr = m.group(3)
# If we saw the instruction pointer for the fault/panic then use it once
if zircon_pc:
prefix = " pc: %s => " % zircon_pc
a2l_out = run_addr2line(arch, zircon_elf_path, zircon_pc)
processed_lines.append(prefix +
a2l_out.replace("(inlined", (" " * len(prefix)) + "(inlined"))
zircon_pc = None
# Adapt offset for KASLR move
addr = kaslr_offset(addr, zircon_code_start, bias)
prefix = "bt#%s: %s => " % (frame_num, addr)
call_loc = get_call_location(arch, zircon_elf_path, addr)
# In the case of inlined methods, it is more readable if the
# inlined lines are aligned to be to the right of "=>".
processed_lines.append(prefix +
call_loc.replace("(inlined", (" " * len(prefix)) + "(inlined"))
continue
# ASAN Specific Handling
m = asan_bt_re.match(line)
if m and not zircon_bt:
asan_bt = True
frame_num = m.group(2)
addr = int(m.group(3), 16)
offset = None
dso = None
for bias, candidate_dso in bias_to_name.items():
if addr >= bias:
candidate_offset = addr - bias
if offset is None or candidate_offset < offset:
offset = candidate_offset
dso = candidate_dso
if offset is None:
processed_lines.append("#%s: unknown, can't find DSO for addr 0x%x\n" %
(frame_num, addr))
continue
dso_full_path = find_dso_full_path(dso, args.app, name_to_buildid, build_dirs)
if not dso_full_path:
processed_lines.append("#%s: unknown, can't find full path for %s\n" %
(frame_num, dso))
continue
# Adapt offset for KASLR move
offset = kaslr_offset(offset, zircon_code_start, bias)
prefix = "bt#%s: 0x%x => " % (frame_num, addr)
call_loc = run_addr2line(arch, dso_full_path, "0x%x" % offset)
if call_loc:
# In the case of inlined methods, it is more readable if the
# inlined lines are aligned to be to the right of "=>".
processed_lines.append(
prefix + call_loc.replace("(inlined",
(" " * len(prefix)) + "(inlined"))
if __name__ == '__main__':
sys.exit(main())