| #!/usr/bin/env python |
| # |
| # Debugify summary for the original debug info testing. |
| # |
| |
| from __future__ import print_function |
| import argparse |
| import os |
| import re |
| import sys |
| from json import loads |
| from collections import defaultdict |
| from collections import OrderedDict |
| |
| class DILocBug: |
| def __init__(self, origin, action, bb_name, fn_name, instr): |
| self.origin = origin |
| self.action = action |
| self.bb_name = bb_name |
| self.fn_name = fn_name |
| self.instr = instr |
| |
| def key(self): |
| return self.action + self.bb_name + self.fn_name + self.instr |
| |
| def reduced_key(self, bug_pass): |
| if self.origin is not None: |
| # If we have the origin stacktrace available, we can use it to efficiently deduplicate identical errors. We |
| # just need to remove the pointer values from the string first, so that we can deduplicate across files. |
| origin_no_addr = re.sub(r"0x[0-9a-fA-F]+", "", self.origin) |
| return origin_no_addr |
| return bug_pass + self.instr |
| |
| def to_dict(self): |
| result = { |
| "instr": self.instr, |
| "fn_name": self.fn_name, |
| "bb_name": self.bb_name, |
| "action": self.action, |
| } |
| if self.origin: |
| result["origin"] = self.origin |
| return result |
| |
| |
| class DISPBug: |
| def __init__(self, action, fn_name): |
| self.action = action |
| self.fn_name = fn_name |
| |
| def key(self): |
| return self.action + self.fn_name |
| |
| def reduced_key(self, bug_pass): |
| return bug_pass + self.fn_name |
| |
| def to_dict(self): |
| return { |
| "fn_name": self.fn_name, |
| "action": self.action, |
| } |
| |
| |
| class DIVarBug: |
| def __init__(self, action, name, fn_name): |
| self.action = action |
| self.name = name |
| self.fn_name = fn_name |
| |
| def key(self): |
| return self.action + self.name + self.fn_name |
| |
| def reduced_key(self, bug_pass): |
| return bug_pass + self.name |
| |
| def to_dict(self): |
| return { |
| "fn_name": self.fn_name, |
| "name": self.name, |
| "action": self.action, |
| } |
| |
| |
| def print_bugs_yaml(name, bugs_dict, indent=2): |
| def get_bug_line(indent_level: int, text: str, margin_mark: bool = False): |
| if margin_mark: |
| return "- ".rjust(indent_level * indent) + text |
| return " " * indent * indent_level + text |
| |
| print(f"{name}:") |
| for bugs_file, bugs_pass_dict in sorted(iter(bugs_dict.items())): |
| print(get_bug_line(1, f"{bugs_file}:")) |
| for bugs_pass, bugs_list in sorted(iter(bugs_pass_dict.items())): |
| print(get_bug_line(2, f"{bugs_pass}:")) |
| for bug in bugs_list: |
| bug_dict = bug.to_dict() |
| first_line = True |
| # First item needs a '-' in the margin. |
| for key, val in sorted(iter(bug_dict.items())): |
| if "\n" in val: |
| # Output block text for any multiline string. |
| print(get_bug_line(3, f"{key}: |", first_line)) |
| for line in val.splitlines(): |
| print(get_bug_line(4, line)) |
| else: |
| print(get_bug_line(3, f"{key}: {val}", first_line)) |
| first_line = False |
| |
| # Report the bugs in form of html. |
| def generate_html_report( |
| di_location_bugs, |
| di_subprogram_bugs, |
| di_var_bugs, |
| di_location_bugs_summary, |
| di_sp_bugs_summary, |
| di_var_bugs_summary, |
| html_file, |
| ): |
| fileout = open(html_file, "w") |
| |
| html_header = """ <html> |
| <head> |
| <style> |
| table, th, td { |
| border: 1px solid black; |
| } |
| table.center { |
| margin-left: auto; |
| margin-right: auto; |
| } |
| </style> |
| </head> |
| <body> |
| """ |
| |
| # Create the table for Location bugs. |
| table_title_di_loc = "Location Bugs found by the Debugify" |
| |
| table_di_loc = """<table> |
| <caption><b>{}</b></caption> |
| <tr> |
| """.format( |
| table_title_di_loc |
| ) |
| |
| # If any DILocation bug has an origin stack trace, we emit an extra column in the table, which we must therefore |
| # determine up-front. |
| has_origin_col = any( |
| x.origin is not None |
| for per_file_bugs in di_location_bugs.values() |
| for per_pass_bugs in per_file_bugs.values() |
| for x in per_pass_bugs |
| ) |
| |
| header_di_loc = [ |
| "File", |
| "LLVM Pass Name", |
| "LLVM IR Instruction", |
| "Function Name", |
| "Basic Block Name", |
| "Action", |
| ] |
| if has_origin_col: |
| header_di_loc.append("Origin") |
| |
| for column in header_di_loc: |
| table_di_loc += " <th>{0}</th>\n".format(column.strip()) |
| table_di_loc += " </tr>\n" |
| |
| at_least_one_bug_found = False |
| |
| # Handle loction bugs. |
| for file, per_file_bugs in di_location_bugs.items(): |
| for llvm_pass, per_pass_bugs in per_file_bugs.items(): |
| # No location bugs for the pass. |
| if len(per_pass_bugs) == 0: |
| continue |
| at_least_one_bug_found = True |
| row = [] |
| table_di_loc += " </tr>\n" |
| # Get the bugs info. |
| for x in per_pass_bugs: |
| row.append(" <tr>\n") |
| row.append(file) |
| row.append(llvm_pass) |
| row.append(x.instr) |
| row.append(x.fn_name) |
| row.append(x.bb_name) |
| row.append(x.action) |
| if has_origin_col: |
| if x.origin is not None: |
| row.append( |
| f"<details><summary>View Origin StackTrace</summary><pre>{x.origin}</pre></details>" |
| ) |
| else: |
| row.append("") |
| row.append(" </tr>\n") |
| # Dump the bugs info into the table. |
| for column in row: |
| # The same file-pass pair can have multiple bugs. |
| if column == " <tr>\n" or column == " </tr>\n": |
| table_di_loc += column |
| continue |
| table_di_loc += " <td>{0}</td>\n".format(column.strip()) |
| table_di_loc += " <tr>\n" |
| |
| if not at_least_one_bug_found: |
| table_di_loc += """ <tr> |
| <td colspan='7'> No bugs found </td> |
| </tr> |
| """ |
| table_di_loc += "</table>\n" |
| |
| # Create the summary table for the loc bugs. |
| table_title_di_loc_sum = "Summary of Location Bugs" |
| table_di_loc_sum = """<table> |
| <caption><b>{}</b></caption> |
| <tr> |
| """.format( |
| table_title_di_loc_sum |
| ) |
| |
| header_di_loc_sum = ["LLVM Pass Name", "Number of bugs"] |
| |
| for column in header_di_loc_sum: |
| table_di_loc_sum += " <th>{0}</th>\n".format(column.strip()) |
| table_di_loc_sum += " </tr>\n" |
| |
| # Print the summary. |
| row = [] |
| for llvm_pass, num in sorted(di_location_bugs_summary.items()): |
| row.append(" <tr>\n") |
| row.append(llvm_pass) |
| row.append(str(num)) |
| row.append(" </tr>\n") |
| for column in row: |
| if column == " <tr>\n" or column == " </tr>\n": |
| table_di_loc_sum += column |
| continue |
| table_di_loc_sum += " <td>{0}</td>\n".format(column.strip()) |
| table_di_loc_sum += " <tr>\n" |
| |
| if not at_least_one_bug_found: |
| table_di_loc_sum += """<tr> |
| <td colspan='2'> No bugs found </td> |
| </tr> |
| """ |
| table_di_loc_sum += "</table>\n" |
| |
| # Create the table for SP bugs. |
| table_title_di_sp = "SP Bugs found by the Debugify" |
| table_di_sp = """<table> |
| <caption><b>{}</b></caption> |
| <tr> |
| """.format( |
| table_title_di_sp |
| ) |
| |
| header_di_sp = ["File", "LLVM Pass Name", "Function Name", "Action"] |
| |
| for column in header_di_sp: |
| table_di_sp += " <th>{0}</th>\n".format(column.strip()) |
| table_di_sp += " </tr>\n" |
| |
| at_least_one_bug_found = False |
| |
| # Handle fn bugs. |
| for file, per_file_bugs in di_subprogram_bugs.items(): |
| for llvm_pass, per_pass_bugs in per_file_bugs.items(): |
| # No SP bugs for the pass. |
| if len(per_pass_bugs) == 0: |
| continue |
| at_least_one_bug_found = True |
| row = [] |
| table_di_sp += " </tr>\n" |
| # Get the bugs info. |
| for x in per_pass_bugs: |
| row.append(" <tr>\n") |
| row.append(file) |
| row.append(llvm_pass) |
| row.append(x.fn_name) |
| row.append(x.action) |
| row.append(" </tr>\n") |
| # Dump the bugs info into the table. |
| for column in row: |
| # The same file-pass pair can have multiple bugs. |
| if column == " <tr>\n" or column == " </tr>\n": |
| table_di_sp += column |
| continue |
| table_di_sp += " <td>{0}</td>\n".format(column.strip()) |
| table_di_sp += " <tr>\n" |
| |
| if not at_least_one_bug_found: |
| table_di_sp += """<tr> |
| <td colspan='4'> No bugs found </td> |
| </tr> |
| """ |
| table_di_sp += "</table>\n" |
| |
| # Create the summary table for the sp bugs. |
| table_title_di_sp_sum = "Summary of SP Bugs" |
| table_di_sp_sum = """<table> |
| <caption><b>{}</b></caption> |
| <tr> |
| """.format( |
| table_title_di_sp_sum |
| ) |
| |
| header_di_sp_sum = ["LLVM Pass Name", "Number of bugs"] |
| |
| for column in header_di_sp_sum: |
| table_di_sp_sum += " <th>{0}</th>\n".format(column.strip()) |
| table_di_sp_sum += " </tr>\n" |
| |
| # Print the summary. |
| row = [] |
| for llvm_pass, num in sorted(di_sp_bugs_summary.items()): |
| row.append(" <tr>\n") |
| row.append(llvm_pass) |
| row.append(str(num)) |
| row.append(" </tr>\n") |
| for column in row: |
| if column == " <tr>\n" or column == " </tr>\n": |
| table_di_sp_sum += column |
| continue |
| table_di_sp_sum += " <td>{0}</td>\n".format(column.strip()) |
| table_di_sp_sum += " <tr>\n" |
| |
| if not at_least_one_bug_found: |
| table_di_sp_sum += """<tr> |
| <td colspan='2'> No bugs found </td> |
| </tr> |
| """ |
| table_di_sp_sum += "</table>\n" |
| |
| # Create the table for Variable bugs. |
| table_title_di_var = "Variable Location Bugs found by the Debugify" |
| table_di_var = """<table> |
| <caption><b>{}</b></caption> |
| <tr> |
| """.format( |
| table_title_di_var |
| ) |
| |
| header_di_var = ["File", "LLVM Pass Name", "Variable", "Function", "Action"] |
| |
| for column in header_di_var: |
| table_di_var += " <th>{0}</th>\n".format(column.strip()) |
| table_di_var += " </tr>\n" |
| |
| at_least_one_bug_found = False |
| |
| # Handle var bugs. |
| for file, per_file_bugs in di_var_bugs.items(): |
| for llvm_pass, per_pass_bugs in per_file_bugs.items(): |
| # No SP bugs for the pass. |
| if len(per_pass_bugs) == 0: |
| continue |
| at_least_one_bug_found = True |
| row = [] |
| table_di_var += " </tr>\n" |
| # Get the bugs info. |
| for x in per_pass_bugs: |
| row.append(" <tr>\n") |
| row.append(file) |
| row.append(llvm_pass) |
| row.append(x.name) |
| row.append(x.fn_name) |
| row.append(x.action) |
| row.append(" </tr>\n") |
| # Dump the bugs info into the table. |
| for column in row: |
| # The same file-pass pair can have multiple bugs. |
| if column == " <tr>\n" or column == " </tr>\n": |
| table_di_var += column |
| continue |
| table_di_var += " <td>{0}</td>\n".format(column.strip()) |
| table_di_var += " <tr>\n" |
| |
| if not at_least_one_bug_found: |
| table_di_var += """<tr> |
| <td colspan='4'> No bugs found </td> |
| </tr> |
| """ |
| table_di_var += "</table>\n" |
| |
| # Create the summary table for the sp bugs. |
| table_title_di_var_sum = "Summary of Variable Location Bugs" |
| table_di_var_sum = """<table> |
| <caption><b>{}</b></caption> |
| <tr> |
| """.format( |
| table_title_di_var_sum |
| ) |
| |
| header_di_var_sum = ["LLVM Pass Name", "Number of bugs"] |
| |
| for column in header_di_var_sum: |
| table_di_var_sum += " <th>{0}</th>\n".format(column.strip()) |
| table_di_var_sum += " </tr>\n" |
| |
| # Print the summary. |
| row = [] |
| for llvm_pass, num in sorted(di_var_bugs_summary.items()): |
| row.append(" <tr>\n") |
| row.append(llvm_pass) |
| row.append(str(num)) |
| row.append(" </tr>\n") |
| for column in row: |
| if column == " <tr>\n" or column == " </tr>\n": |
| table_di_var_sum += column |
| continue |
| table_di_var_sum += " <td>{0}</td>\n".format(column.strip()) |
| table_di_var_sum += " <tr>\n" |
| |
| if not at_least_one_bug_found: |
| table_di_var_sum += """<tr> |
| <td colspan='2'> No bugs found </td> |
| </tr> |
| """ |
| table_di_var_sum += "</table>\n" |
| |
| # Finish the html page. |
| html_footer = """</body> |
| </html>""" |
| |
| new_line = "<br>\n" |
| |
| fileout.writelines(html_header) |
| fileout.writelines(table_di_loc) |
| fileout.writelines(new_line) |
| fileout.writelines(table_di_loc_sum) |
| fileout.writelines(new_line) |
| fileout.writelines(new_line) |
| fileout.writelines(table_di_sp) |
| fileout.writelines(new_line) |
| fileout.writelines(table_di_sp_sum) |
| fileout.writelines(new_line) |
| fileout.writelines(new_line) |
| fileout.writelines(table_di_var) |
| fileout.writelines(new_line) |
| fileout.writelines(table_di_var_sum) |
| fileout.writelines(html_footer) |
| fileout.close() |
| |
| print("The " + html_file + " generated.") |
| |
| |
| # Read the JSON file in chunks. |
| def get_json_chunk(file, start, size): |
| json_parsed = None |
| di_checker_data = [] |
| skipped_lines = 0 |
| line = 0 |
| |
| # The file contains json object per line. |
| # An example of the line (formatted json): |
| # { |
| # "file": "simple.c", |
| # "pass": "Deduce function attributes in RPO", |
| # "bugs": [ |
| # [ |
| # { |
| # "action": "drop", |
| # "metadata": "DISubprogram", |
| # "name": "fn2" |
| # }, |
| # { |
| # "action": "drop", |
| # "metadata": "DISubprogram", |
| # "name": "fn1" |
| # } |
| # ] |
| # ] |
| # } |
| with open(file) as json_objects_file: |
| for json_object_line in json_objects_file: |
| line += 1 |
| if line < start: |
| continue |
| if line >= start + size: |
| break |
| try: |
| json_object = loads(json_object_line) |
| except: |
| skipped_lines += 1 |
| else: |
| di_checker_data.append(json_object) |
| |
| return (di_checker_data, skipped_lines, line) |
| |
| |
| # Parse the program arguments. |
| def parse_program_args(parser): |
| parser.add_argument("file_name", type=str, help="json file to process") |
| parser.add_argument( |
| "--reduce", |
| action="store_true", |
| help="create reduced report by deduplicating bugs within and across files", |
| ) |
| |
| report_type_group = parser.add_mutually_exclusive_group(required=True) |
| report_type_group.add_argument( |
| "--report-html-file", type=str, help="output HTML file for the generated report" |
| ) |
| report_type_group.add_argument( |
| "--acceptance-test", |
| action="store_true", |
| help="if set, produce terminal-friendly output and return 0 iff the input file is empty or does not exist", |
| ) |
| |
| return parser.parse_args() |
| |
| |
| def Main(): |
| parser = argparse.ArgumentParser() |
| opts = parse_program_args(parser) |
| |
| if opts.report_html_file is not None and not opts.report_html_file.endswith( |
| ".html" |
| ): |
| print("error: The output file must be '.html'.") |
| sys.exit(1) |
| |
| if opts.acceptance_test: |
| if os.path.isdir(opts.file_name): |
| print(f"error: Directory passed as input file: '{opts.file_name}'") |
| sys.exit(1) |
| if not os.path.exists(opts.file_name): |
| # We treat an empty input file as a success, as debugify will generate an output file iff any errors are |
| # found, meaning we expect 0 errors to mean that the expected file does not exist. |
| print(f"No errors detected for: {opts.file_name}") |
| sys.exit(0) |
| |
| # Use the defaultdict in order to make multidim dicts. |
| di_location_bugs = defaultdict(lambda: defaultdict(list)) |
| di_subprogram_bugs = defaultdict(lambda: defaultdict(list)) |
| di_variable_bugs = defaultdict(lambda: defaultdict(list)) |
| |
| # Use the ordered dict to make a summary. |
| di_location_bugs_summary = OrderedDict() |
| di_sp_bugs_summary = OrderedDict() |
| di_var_bugs_summary = OrderedDict() |
| |
| # If we are using --reduce, use these sets to deduplicate similar bugs within and across files. |
| di_loc_reduced_set = set() |
| di_sp_reduced_set = set() |
| di_var_reduced_set = set() |
| |
| start_line = 0 |
| chunk_size = 1000000 |
| end_line = chunk_size - 1 |
| skipped_lines = 0 |
| skipped_bugs = 0 |
| # Process each chunk of 1 million JSON lines. |
| while True: |
| if start_line > end_line: |
| break |
| (debug_info_bugs, skipped, end_line) = get_json_chunk( |
| opts.file_name, start_line, chunk_size |
| ) |
| start_line += chunk_size |
| skipped_lines += skipped |
| |
| # Map the bugs into the file-pass pairs. |
| for bugs_per_pass in debug_info_bugs: |
| try: |
| bugs_file = bugs_per_pass["file"] |
| bugs_pass = bugs_per_pass["pass"] |
| bugs = bugs_per_pass["bugs"][0] |
| except: |
| skipped_lines += 1 |
| continue |
| |
| di_loc_bugs = di_location_bugs.get("bugs_file", {}).get("bugs_pass", []) |
| di_sp_bugs = di_subprogram_bugs.get("bugs_file", {}).get("bugs_pass", []) |
| di_var_bugs = di_variable_bugs.get("bugs_file", {}).get("bugs_pass", []) |
| |
| # Omit duplicated bugs. |
| di_loc_set = set() |
| di_sp_set = set() |
| di_var_set = set() |
| for bug in bugs: |
| try: |
| bugs_metadata = bug["metadata"] |
| except: |
| skipped_bugs += 1 |
| continue |
| |
| if bugs_metadata == "DILocation": |
| try: |
| origin = bug.get("origin") |
| action = bug["action"] |
| bb_name = bug["bb-name"] |
| fn_name = bug["fn-name"] |
| instr = bug["instr"] |
| except: |
| skipped_bugs += 1 |
| continue |
| di_loc_bug = DILocBug(origin, action, bb_name, fn_name, instr) |
| if not di_loc_bug.key() in di_loc_set: |
| di_loc_set.add(di_loc_bug.key()) |
| if opts.reduce: |
| reduced_key = di_loc_bug.reduced_key(bugs_pass) |
| if not reduced_key in di_loc_reduced_set: |
| di_loc_reduced_set.add(reduced_key) |
| di_loc_bugs.append(di_loc_bug) |
| else: |
| di_loc_bugs.append(di_loc_bug) |
| |
| # Fill the summary dict. |
| if bugs_pass in di_location_bugs_summary: |
| di_location_bugs_summary[bugs_pass] += 1 |
| else: |
| di_location_bugs_summary[bugs_pass] = 1 |
| elif bugs_metadata == "DISubprogram": |
| try: |
| action = bug["action"] |
| name = bug["name"] |
| except: |
| skipped_bugs += 1 |
| continue |
| di_sp_bug = DISPBug(action, name) |
| if not di_sp_bug.key() in di_sp_set: |
| di_sp_set.add(di_sp_bug.key()) |
| if opts.reduce: |
| reduced_key = di_sp_bug.reduced_key(bugs_pass) |
| if not reduced_key in di_sp_reduced_set: |
| di_sp_reduced_set.add(reduced_key) |
| di_sp_bugs.append(di_sp_bug) |
| else: |
| di_sp_bugs.append(di_sp_bug) |
| |
| # Fill the summary dict. |
| if bugs_pass in di_sp_bugs_summary: |
| di_sp_bugs_summary[bugs_pass] += 1 |
| else: |
| di_sp_bugs_summary[bugs_pass] = 1 |
| elif bugs_metadata == "dbg-var-intrinsic": |
| try: |
| action = bug["action"] |
| fn_name = bug["fn-name"] |
| name = bug["name"] |
| except: |
| skipped_bugs += 1 |
| continue |
| di_var_bug = DIVarBug(action, name, fn_name) |
| if not di_var_bug.key() in di_var_set: |
| di_var_set.add(di_var_bug.key()) |
| if opts.reduce: |
| reduced_key = di_var_bug.reduced_key(bugs_pass) |
| if not reduced_key in di_var_reduced_set: |
| di_var_reduced_set.add(reduced_key) |
| di_var_bugs.append(di_var_bug) |
| else: |
| di_var_bugs.append(di_var_bug) |
| |
| # Fill the summary dict. |
| if bugs_pass in di_var_bugs_summary: |
| di_var_bugs_summary[bugs_pass] += 1 |
| else: |
| di_var_bugs_summary[bugs_pass] = 1 |
| else: |
| # Unsupported metadata. |
| skipped_bugs += 1 |
| continue |
| |
| if di_loc_bugs: |
| di_location_bugs[bugs_file][bugs_pass] = di_loc_bugs |
| if di_sp_bugs: |
| di_subprogram_bugs[bugs_file][bugs_pass] = di_sp_bugs |
| if di_var_bugs: |
| di_variable_bugs[bugs_file][bugs_pass] = di_var_bugs |
| |
| if opts.report_html_file is not None: |
| generate_html_report( |
| di_location_bugs, |
| di_subprogram_bugs, |
| di_variable_bugs, |
| di_location_bugs_summary, |
| di_sp_bugs_summary, |
| di_var_bugs_summary, |
| opts.report_html_file, |
| ) |
| else: |
| # Pretty(ish) print the detected bugs, but check if any exist first so that we don't print an empty dict. |
| if di_location_bugs: |
| print_bugs_yaml("DILocation Bugs", di_location_bugs) |
| if di_subprogram_bugs: |
| print_bugs_yaml("DISubprogram Bugs", di_subprogram_bugs) |
| if di_variable_bugs: |
| print_bugs_yaml("DIVariable Bugs", di_variable_bugs) |
| |
| if opts.acceptance_test: |
| if any((di_location_bugs, di_subprogram_bugs, di_variable_bugs)): |
| # Add a newline gap after printing at least one error. |
| print() |
| print(f"Errors detected for: {opts.file_name}") |
| sys.exit(1) |
| else: |
| print(f"No errors detected for: {opts.file_name}") |
| |
| if skipped_lines > 0: |
| print("Skipped lines: " + str(skipped_lines)) |
| if skipped_bugs > 0: |
| print("Skipped bugs: " + str(skipped_bugs)) |
| |
| |
| if __name__ == "__main__": |
| Main() |
| sys.exit(0) |