| #!/usr/bin/env python |
| |
| # Copyright 2016 The Fuchsia Authors |
| # |
| # Use of this source code is governed by a MIT-style |
| # license that can be found in the LICENSE file or at |
| # https://opensource.org/licenses/MIT |
| |
| """ |
| |
| This tool will symbolize a crash from Zircon's crash logger, adding |
| function names and, if available, source code locations (filenames and |
| line numbers from debug info). |
| |
| Example usage #1: |
| ./scripts/run-zircon -a x64 | ./scripts/symbolize devmgr.elf --build-dir=build-x64 |
| |
| Example usage #2: |
| ./scripts/symbolize devmgr.elf --build-dir=build-x64 |
| <copy and paste output from Zircon> |
| |
| Example usage #3 (for zircon kernel output): |
| ./scripts/symbolize --build-dir=build-x64 |
| <copy and paste output from Zircon> |
| |
| """ |
| |
| import argparse |
| import errno |
| import os |
| import re |
| import subprocess |
| import sys |
| |
| SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__)) |
| PREBUILTS_BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(SCRIPT_DIR), "prebuilt", |
| "downloads")) |
| GCC_VERSION = '6.3.0' |
| name_to_full_path = {} |
| debug_mode = False |
| |
| # Paths to various external tools can be provided on the command line. |
| # If these are None then use the prebuilt location. |
| addr2line_tool_path = None |
| gdb_tool_path = None |
| |
| |
| def find_func(find_args, dirname, names): |
| if find_args["path"] != "": # found something! |
| return |
| if dirname.find("sysroot") != -1: |
| return |
| for name in names: |
| if name == find_args["name"]: |
| find_args["path"] = dirname |
| return |
| |
| |
| def find_file_in_build_dir(name, build_dirs): |
| find_args = {"name": name, "path": ""} |
| for location in build_dirs: |
| os.path.walk(location, find_func, find_args) |
| if find_args["path"] != "": |
| return os.path.abspath(os.path.join(find_args["path"], name)) |
| return None |
| |
| |
| def buildid_to_full_path(buildid, build_dirs): |
| for build_dir in build_dirs: |
| id_file_path = os.path.join(build_dir, "ids.txt") |
| if os.path.exists(id_file_path): |
| with open(id_file_path) as id_file: |
| for line in id_file: |
| id, path = line.split() |
| if id == buildid: |
| return path |
| return None |
| |
| |
| def find_file_in_boot_manifest(boot_app_name, build_dirs): |
| manifest_path = find_file_in_build_dir("bootfs.manifest", build_dirs) |
| if manifest_path: |
| with open(manifest_path) as manifest_file: |
| for line in manifest_file: |
| out_path, in_path = line.rstrip().split("=") |
| if out_path == boot_app_name: |
| if in_path.endswith(".strip"): |
| in_path = in_path[:-len(".strip")] |
| return in_path |
| return None |
| |
| |
| def find_dso_full_path_uncached(dso, exe_name, name_to_buildid, build_dirs): |
| if dso in name_to_buildid: |
| found_path = buildid_to_full_path(name_to_buildid[dso], build_dirs) |
| if found_path: |
| return found_path |
| # This can be a bug in the generation of the ids.txt file, report it. |
| # It's not necessarily a bug though, so for now only report in debug mode. |
| if debug_mode: |
| print "WARNING: Unable to find %s in any ids.txt file." % dso |
| |
| # The name 'app' indicates the real app name is unknown. |
| # If the process has a name property that will be printed, but |
| # it has a max of 32 characters so it may be insufficient. |
| # Crashlogger prefixes such names with "app:" for our benefit. |
| if dso == "app" or dso.startswith("app:"): |
| # If an executable was passed on the command-line, try using that |
| if exe_name: |
| found_path = find_file_in_build_dir(exe_name, build_dirs) |
| if found_path: |
| return found_path |
| |
| # If this looks like a program in boot fs, consult the manifest |
| if dso.startswith("app:/boot/"): |
| boot_app_name = dso[len("app:/boot/"):] |
| found_path = find_file_in_boot_manifest(boot_app_name, build_dirs) |
| if found_path: |
| return found_path |
| return None |
| |
| # First, try an exact match for the filename |
| found_path = find_file_in_build_dir(dso, build_dirs) |
| if not found_path: |
| # If that fails, and this file doesn't end with .so, try the executable |
| # name |
| if not dso.endswith(".so"): |
| found_path = find_file_in_build_dir(exe_name, build_dirs) |
| if not found_path: |
| # If that still fails and this looks like an absolute path, try the |
| # last path component |
| if dso.startswith("/"): |
| short_name = dso[dso.rfind("/"):] |
| found_path = find_file_in_build_dir(short_name, build_dirs) |
| return found_path |
| |
| |
| def find_dso_full_path(dso, exe_name, name_to_buildid, build_dirs): |
| if dso in name_to_full_path: |
| return name_to_full_path[dso] |
| found_path = find_dso_full_path_uncached(dso, exe_name, name_to_buildid, build_dirs) |
| if found_path: |
| name_to_full_path[dso] = found_path |
| return found_path |
| |
| |
| def tool_path(arch, tool, user_provided_path): |
| if user_provided_path is not None: |
| return user_provided_path |
| if sys.platform.startswith("linux"): |
| platform = "Linux" |
| elif sys.platform.startswith("darwin"): |
| platform = "Darwin" |
| else: |
| raise Exception("Unsupported platform!") |
| return ("%s/gcc/bin/%s-elf-%s" % |
| (PREBUILTS_BASE_DIR, arch, tool)) |
| |
| |
| def run_tool(path, *args): |
| cmd = [path] + list(args) |
| if debug_mode: |
| print "Running: %s" % " ".join(cmd) |
| try: |
| output = subprocess.check_output(cmd) |
| except Exception as e: |
| tool = os.path.basename(path) |
| print "Calling %s failed: command %s error %s" % (tool, cmd, e) |
| return False |
| return output |
| |
| |
| # Note: addr2line requires hex addresses. |
| # |addr_as_hex_string| must already be PIE-adjusted. |
| def run_addr2line(arch, elf_path, addr_as_hex_string): |
| path = tool_path(arch, "addr2line", addr2line_tool_path) |
| return run_tool(path, "-Cipfe", elf_path, addr_as_hex_string) |
| |
| |
| def run_gdb(arch, *args): |
| path = tool_path(arch, "gdb", gdb_tool_path) |
| return run_tool(path, *args) |
| |
| |
| def get_call_location(arch, elf_path, addr_as_hex_string): |
| # Subtract 1 to convert from a return address to a call site |
| # address. (To be more exact, this converts to an address that |
| # is within the call site instruction.) This adjustment gives |
| # more correct results in the presence of inlining and |
| # 'noreturn' functions. (See ZX-842.) |
| call_addr = "0x%x" % (int(addr_as_hex_string, 16) - 1) |
| return run_addr2line(arch, elf_path, call_addr) |
| |
| |
| # On BSD platforms there are cases where writing to stdout can return EAGAIN. |
| # In that event, retry the line again. This only manifests itself when piping |
| # qemu's stdout directly to this script. |
| def writelines(lines): |
| for line in lines: |
| writeline(line) |
| |
| |
| def writeline(line): |
| while True: |
| try: |
| sys.stdout.write(line) |
| except IOError as e: |
| if e.errno == errno.EAGAIN: |
| continue |
| break |
| |
| |
| # Offset the address based on binary code start and bias |
| # Return same type than input |
| def kaslr_offset(addr, code_start, bias): |
| if not code_start or not bias: |
| return addr |
| is_string = isinstance(addr, str) |
| if is_string: |
| addr = int(addr, 16) |
| addr -= bias - code_start |
| if is_string: |
| return '%x' % addr |
| return addr |
| |
| |
| def main(): |
| parser = argparse.ArgumentParser( |
| description=__doc__, |
| formatter_class=argparse.RawDescriptionHelpFormatter) |
| parser.add_argument("--file", "-f", nargs="?", type=argparse.FileType("r"), |
| default=sys.stdin, |
| help="File to read from, stdin by default") |
| parser.add_argument("--build-dir", "-b", nargs="*", |
| help="List of build directories to search instead of the default (out/x64)") |
| parser.add_argument("--disassemble", "-d", action="store_true", |
| help="Show disassembly of each function") |
| parser.add_argument("--stack_size", "-s", type=int, |
| default=256*1024, |
| help="Change the assumed size of the stack (e.g. use 1048576 for ftl or " |
| "mtl default thread size") |
| parser.add_argument("--echo", dest="echo", action="store_true", |
| help="Echo lines of input (on by default)") |
| parser.add_argument("--no-echo", dest="echo", action="store_false", |
| help="Don't echo lines of input") |
| parser.add_argument("--debug", "-D", action="store_true", |
| help="Print messages for debugging symbolize") |
| parser.add_argument("--addr2line", default=None, |
| help="Path of addr2line program to use") |
| parser.add_argument("--gdb", default=None, |
| help="Path of gdb program to use") |
| parser.add_argument("app", nargs="?", help="Name of primary application") |
| parser.set_defaults(echo=True) |
| args = parser.parse_args() |
| global debug_mode |
| debug_mode = args.debug |
| global addr2line_tool_path |
| addr2line_tool_path = args.addr2line |
| global gdb_tool_path |
| gdb_tool_path = args.gdb |
| |
| zircon_build_dir = os.path.join( |
| os.path.dirname(SCRIPT_DIR), "build-x64") |
| if not os.path.exists(zircon_build_dir): |
| zircon_build_dir = os.path.join( |
| os.path.dirname(SCRIPT_DIR), os.pardir, "out", "build-zircon", "build-x64") |
| build_dirs = [zircon_build_dir] |
| if args.build_dir: |
| build_dirs = args.build_dir + build_dirs |
| else: |
| # Put the unstripped path ahead of the stripped one, we want the |
| # former searched first. This does mean the unstripped directory |
| # will get searched twice, but relative to the entire search time, |
| # the addition is small. |
| # Plus once a file is found its location is cached. |
| # Plus this is only used as a fallback in case the file isn't found |
| # in ids.txt. |
| fuchsia_build_dir = os.path.abspath(os.path.join( |
| os.path.dirname(SCRIPT_DIR), os.pardir, "out", "x64")) |
| fuchsia_unstripped_dir = os.path.join(fuchsia_build_dir, "exe.unstripped") |
| build_dirs = [fuchsia_unstripped_dir, fuchsia_build_dir] + build_dirs |
| |
| # Parsing vars |
| arch = "x86_64" |
| bias = 0 |
| name_to_buildid = {} |
| bias_to_name = {} |
| processed_lines = [] |
| prev_sp = None |
| prev_frame_num = None |
| frame_sizes = [] |
| total_stack_size = 0 |
| # If True and we see a dso line, start over collecting the list. |
| done_dso_list = True |
| |
| # Regex for parsing |
| # Either nothing, or something like "[00007.268] 00304.00325> " |
| full_prefix = "^(|\[\d+\.\d+\] \d+\.\d+> ?)" |
| btre = re.compile(full_prefix + "bt#(\d+):") |
| bt_with_offset_re = re.compile(full_prefix + |
| "bt#(\d+): pc 0x[0-9a-f]+ sp (0x[0-9a-f]+) \(([^,]+),(0x[0-9a-f]+)\)$") |
| bt_end_re = re.compile(full_prefix + "bt#(\d+): end") |
| arch_re = re.compile(full_prefix + "arch: ([\\S]+)$") |
| build_id_re = re.compile(full_prefix + |
| "(?:dlsvc: debug: )?dso: id=([0-9a-z]+) base=(0x[0-9a-f]+) name=(.+)$") |
| disasm_re = re.compile("^ *(0x[0-9a-f]+)( .+)$") |
| |
| # Zircon backtraces |
| zircon_crash_re = re.compile(full_prefix + "ZIRCON KERNEL PANIC$") |
| # TODO(cja): Add ARM to the regex |
| zircon_pc_re = re.compile("RIP: (0x[0-9a-z]+)") |
| zircon_bt_re = re.compile(full_prefix + |
| "bt#(\d+): (\dx[0-9a-fA-F]+)$") |
| zircon_nm_codestart = re.compile('^([a-f0-9]+) t __code_start$', re.M) |
| zircon_elf_path = '' |
| zircon_code_start = None |
| zircon_bt = False |
| zircon_pc = '' |
| |
| # ASAN backtraces |
| asan_bt_re = re.compile(full_prefix + "\{\{\{bt:(\d+):(0x[0-9a-f]+)\}\}\}") |
| asan_bt_end_re = re.compile(full_prefix + "$") |
| asan_bt = False |
| |
| while True: |
| line = args.file.readline() |
| if args.echo and not args.file.isatty(): |
| writeline(line) |
| end_of_file = (line == '') |
| # Strip any trailing carriage return character ("\r"), since |
| # these appear in the serial console output from QEMU. |
| line = line.rstrip() |
| bt_end = bt_end_re.match(line) |
| asan_bt_end = asan_bt and asan_bt_end_re.match(line) |
| if bt_end or asan_bt_end or end_of_file: |
| if len(processed_lines) != 0: |
| writeline("\nstart of symbolized stack:\n") |
| writelines(processed_lines) |
| writeline("end of symbolized stack\n") |
| |
| if total_stack_size > args.stack_size - 8*1024: |
| if total_stack_size >= args.stack_size: |
| message = "Overflowed stack" |
| else: |
| message = "Potentially overflowed stack" |
| writeline("WARNING: %s (total usage: %d, stack size: %d)\n" % |
| (message, total_stack_size, args.stack_size)) |
| for frame, size in frame_sizes: |
| writeline("#%s: %d bytes\n" % (frame, size)) |
| # Since we don't have the last stack frame, leave some leeway |
| # when warning about stackoverflow |
| processed_lines = [] |
| frames_sizes = [] |
| total_stack_size = 0 |
| prev_sp = None |
| zircon_bt = False |
| asan_bt = False |
| if end_of_file: |
| break |
| else: |
| continue |
| |
| m = arch_re.match(line) |
| if m: |
| arch = m.group(2) |
| continue |
| m = build_id_re.match(line) |
| if m: |
| if done_dso_list: |
| name_to_buildid = {} |
| bias_to_name = {} |
| done_dso_list = False |
| buildid = m.group(2) |
| bias = int(m.group(3), 16) |
| name = m.group(4) |
| name_to_buildid[name] = buildid |
| bias_to_name[bias] = name |
| |
| if zircon_code_start and zircon_code_start != bias: |
| if zircon_code_start < bias: |
| diff = bias - zircon_code_start |
| c = '+' |
| else: |
| diff = zircon_code_start - bias |
| c = '-' |
| writeline('kaslr offset is %c0x%x' % (c, diff)) |
| continue |
| # We didn't see a dso line, so we're done with this list. |
| # The next time we see one means we're starting a new list. |
| done_dso_list = True |
| m = btre.match(line) |
| if m and not zircon_bt: |
| frame_num = m.group(2) |
| m = bt_with_offset_re.match(line) |
| if m: |
| sp = int(m.group(3), 16) |
| if prev_sp is not None: |
| frame_size = sp - prev_sp |
| total_stack_size += frame_size |
| frame_sizes.append((prev_frame_num, frame_size)) |
| prev_sp = sp |
| prev_frame_num = frame_num |
| |
| dso = m.group(4) |
| off = m.group(5) |
| |
| # Adapt offset for KASLR move |
| off = kaslr_offset(off, zircon_code_start, bias) |
| |
| dso_full_path = find_dso_full_path( |
| dso, args.app, name_to_buildid, build_dirs) |
| if dso_full_path: |
| call_loc = get_call_location(arch, dso_full_path, off) |
| if call_loc: |
| processed_lines.append( |
| "#%s: %s" % (frame_num, call_loc)) |
| if args.disassemble: |
| pc = int(off, 16) |
| disassembly = run_gdb( |
| arch, "--nx", "--batch", "-ex", |
| "disassemble %#x" % pc, dso_full_path) |
| if disassembly: |
| for line in disassembly.splitlines(): |
| m = disasm_re.match(line) |
| if m: |
| addr, rest = m.groups() |
| addr = int(addr, 16) |
| if addr == pc: |
| prefix = "=> " |
| else: |
| prefix = " " |
| line = "%s%#.16x%s" % (prefix, bias + addr, rest) |
| processed_lines.append(line + "\n") |
| continue |
| else: |
| # can't find dso_full_path |
| processed_lines.append("#%s: unknown, can't find full path for %s\n" % |
| (frame_num, dso)) |
| else: |
| # bt_with_offset_re failed |
| processed_lines.append("#%s: unknown, can't find pc, sp or app/library in line\n" % |
| (frame_num,)) |
| |
| # Zircon Specific Handling |
| if zircon_crash_re.search(line): |
| zircon_elf_path = find_file_in_build_dir("zircon.elf", build_dirs) |
| if not zircon_elf_path: |
| sys.stderr.write("Symbolize could not find the zircon elf binary. Perhaps you need " |
| "to build zircon or specify the build directory with -b?\n") |
| continue |
| zircon_bt = True |
| nm_result = run_tool(arch, "nm", zircon_elf_path) |
| m = zircon_nm_codestart.search(nm_result) |
| if not m: |
| sys.stderr.write("Failed to find __code_start from nm") |
| continue |
| zircon_code_start = int(m.group(1), 16) |
| continue |
| |
| m = zircon_pc_re.search(line) |
| if m: |
| zircon_pc = kaslr_offset(m.group(1), zircon_code_start, bias) |
| continue |
| |
| m = zircon_bt_re.match(line) |
| if m and zircon_bt: |
| frame_num = m.group(2) |
| addr = m.group(3) |
| # If we saw the instruction pointer for the fault/panic then use it once |
| if zircon_pc: |
| prefix = " pc: %s => " % zircon_pc |
| a2l_out = run_addr2line(arch, zircon_elf_path, zircon_pc) |
| processed_lines.append(prefix + |
| a2l_out.replace("(inlined", (" " * len(prefix)) + "(inlined")) |
| zircon_pc = None |
| |
| # Adapt offset for KASLR move |
| addr = kaslr_offset(addr, zircon_code_start, bias) |
| |
| prefix = "bt#%s: %s => " % (frame_num, addr) |
| call_loc = get_call_location(arch, zircon_elf_path, addr) |
| # In the case of inlined methods, it is more readable if the |
| # inlined lines are aligned to be to the right of "=>". |
| processed_lines.append(prefix + |
| call_loc.replace("(inlined", (" " * len(prefix)) + "(inlined")) |
| continue |
| |
| # ASAN Specific Handling |
| m = asan_bt_re.match(line) |
| if m and not zircon_bt: |
| asan_bt = True |
| frame_num = m.group(2) |
| addr = int(m.group(3), 16) |
| offset = None |
| dso = None |
| for bias, candidate_dso in bias_to_name.items(): |
| if addr >= bias: |
| candidate_offset = addr - bias |
| if offset is None or candidate_offset < offset: |
| offset = candidate_offset |
| dso = candidate_dso |
| if offset is None: |
| processed_lines.append("#%s: unknown, can't find DSO for addr 0x%x\n" % |
| (frame_num, addr)) |
| continue |
| dso_full_path = find_dso_full_path(dso, args.app, name_to_buildid, build_dirs) |
| if not dso_full_path: |
| processed_lines.append("#%s: unknown, can't find full path for %s\n" % |
| (frame_num, dso)) |
| continue |
| |
| # Adapt offset for KASLR move |
| offset = kaslr_offset(offset, zircon_code_start, bias) |
| |
| prefix = "bt#%s: 0x%x => " % (frame_num, addr) |
| call_loc = run_addr2line(arch, dso_full_path, "0x%x" % offset) |
| if call_loc: |
| # In the case of inlined methods, it is more readable if the |
| # inlined lines are aligned to be to the right of "=>". |
| processed_lines.append( |
| prefix + call_loc.replace("(inlined", |
| (" " * len(prefix)) + "(inlined")) |
| |
| if __name__ == '__main__': |
| sys.exit(main()) |