llvm/utils/llvm-original-di-preservation.py - third_party/llvm-project - Git at Google

 #!/usr/bin/env python
 #
 # Debugify summary for the original debug info testing.
 #

 from __future__ import print_function
 import argparse
 import os
 import re
 import sys
 from json import loads
 from collections import defaultdict
 from collections import OrderedDict

 class DILocBug:
     def __init__(self, origin, action, bb_name, fn_name, instr):
         self.origin = origin
         self.action = action
         self.bb_name = bb_name
         self.fn_name = fn_name
         self.instr = instr

     def key(self):
         return self.action + self.bb_name + self.fn_name + self.instr

     def reduced_key(self, bug_pass):
         if self.origin is not None:
             # If we have the origin stacktrace available, we can use it to efficiently deduplicate identical errors. We
             # just need to remove the pointer values from the string first, so that we can deduplicate across files.
             origin_no_addr = re.sub(r"0x[0-9a-fA-F]+", "", self.origin)
             return origin_no_addr
         return bug_pass + self.instr

     def to_dict(self):
         result = {
             "instr": self.instr,
             "fn_name": self.fn_name,
             "bb_name": self.bb_name,
             "action": self.action,
         }
         if self.origin:
             result["origin"] = self.origin
         return result


 class DISPBug:
     def __init__(self, action, fn_name):
         self.action = action
         self.fn_name = fn_name

     def key(self):
         return self.action + self.fn_name

     def reduced_key(self, bug_pass):
         return bug_pass + self.fn_name

     def to_dict(self):
         return {
             "fn_name": self.fn_name,
             "action": self.action,
         }


 class DIVarBug:
     def __init__(self, action, name, fn_name):
         self.action = action
         self.name = name
         self.fn_name = fn_name

     def key(self):
         return self.action + self.name + self.fn_name

     def reduced_key(self, bug_pass):
         return bug_pass + self.name

     def to_dict(self):
         return {
             "fn_name": self.fn_name,
             "name": self.name,
             "action": self.action,
         }


 def print_bugs_yaml(name, bugs_dict, indent=2):
     def get_bug_line(indent_level: int, text: str, margin_mark: bool = False):
         if margin_mark:
             return "- ".rjust(indent_level * indent) + text
         return " " * indent * indent_level + text

     print(f"{name}:")
     for bugs_file, bugs_pass_dict in sorted(iter(bugs_dict.items())):
         print(get_bug_line(1, f"{bugs_file}:"))
         for bugs_pass, bugs_list in sorted(iter(bugs_pass_dict.items())):
             print(get_bug_line(2, f"{bugs_pass}:"))
             for bug in bugs_list:
                 bug_dict = bug.to_dict()
                 first_line = True
                 # First item needs a '-' in the margin.
                 for key, val in sorted(iter(bug_dict.items())):
                     if "\n" in val:
                         # Output block text for any multiline string.
                         print(get_bug_line(3, f"{key}: |", first_line))
                         for line in val.splitlines():
                             print(get_bug_line(4, line))
                     else:
                         print(get_bug_line(3, f"{key}: {val}", first_line))
                     first_line = False

 # Report the bugs in form of html.
 def generate_html_report(
     di_location_bugs,
     di_subprogram_bugs,
     di_var_bugs,
     di_location_bugs_summary,
     di_sp_bugs_summary,
     di_var_bugs_summary,
     html_file,
 ):
     fileout = open(html_file, "w")

     html_header = """ <html>
   <head>
   <style>
   table, th, td {
     border: 1px solid black;
   }
   table.center {
     margin-left: auto;
     margin-right: auto;
   }
   </style>
   </head>
   <body>
   """

     # Create the table for Location bugs.
     table_title_di_loc = "Location Bugs found by the Debugify"

     table_di_loc = """<table>
   <caption><b>{}</b></caption>
   <tr>
   """.format(
         table_title_di_loc
     )

     # If any DILocation bug has an origin stack trace, we emit an extra column in the table, which we must therefore
     # determine up-front.
     has_origin_col = any(
         x.origin is not None
         for per_file_bugs in di_location_bugs.values()
         for per_pass_bugs in per_file_bugs.values()
         for x in per_pass_bugs
     )

     header_di_loc = [
         "File",
         "LLVM Pass Name",
         "LLVM IR Instruction",
         "Function Name",
         "Basic Block Name",
         "Action",
     ]
     if has_origin_col:
         header_di_loc.append("Origin")

     for column in header_di_loc:
         table_di_loc += "    <th>{0}</th>\n".format(column.strip())
     table_di_loc += "  </tr>\n"

     at_least_one_bug_found = False

     # Handle loction bugs.
     for file, per_file_bugs in di_location_bugs.items():
         for llvm_pass, per_pass_bugs in per_file_bugs.items():
             # No location bugs for the pass.
             if len(per_pass_bugs) == 0:
                 continue
             at_least_one_bug_found = True
             row = []
             table_di_loc += "  </tr>\n"
             # Get the bugs info.
             for x in per_pass_bugs:
                 row.append("    <tr>\n")
                 row.append(file)
                 row.append(llvm_pass)
                 row.append(x.instr)
                 row.append(x.fn_name)
                 row.append(x.bb_name)
                 row.append(x.action)
                 if has_origin_col:
                     if x.origin is not None:
                         row.append(
                             f"<details><summary>View Origin StackTrace</summary><pre>{x.origin}</pre></details>"
                         )
                     else:
                         row.append("")
                 row.append("    </tr>\n")
             # Dump the bugs info into the table.
             for column in row:
                 # The same file-pass pair can have multiple bugs.
                 if column == "    <tr>\n" or column == "    </tr>\n":
                     table_di_loc += column
                     continue
                 table_di_loc += "    <td>{0}</td>\n".format(column.strip())
             table_di_loc += "  <tr>\n"

     if not at_least_one_bug_found:
         table_di_loc += """  <tr>
         <td colspan='7'> No bugs found </td>
       </tr>
     """
     table_di_loc += "</table>\n"

     # Create the summary table for the loc bugs.
     table_title_di_loc_sum = "Summary of Location Bugs"
     table_di_loc_sum = """<table>
   <caption><b>{}</b></caption>
   <tr>
   """.format(
         table_title_di_loc_sum
     )

     header_di_loc_sum = ["LLVM Pass Name", "Number of bugs"]

     for column in header_di_loc_sum:
         table_di_loc_sum += "    <th>{0}</th>\n".format(column.strip())
     table_di_loc_sum += "  </tr>\n"

     # Print the summary.
     row = []
     for llvm_pass, num in sorted(di_location_bugs_summary.items()):
         row.append("    <tr>\n")
         row.append(llvm_pass)
         row.append(str(num))
         row.append("    </tr>\n")
     for column in row:
         if column == "    <tr>\n" or column == "    </tr>\n":
             table_di_loc_sum += column
             continue
         table_di_loc_sum += "    <td>{0}</td>\n".format(column.strip())
     table_di_loc_sum += "  <tr>\n"

     if not at_least_one_bug_found:
         table_di_loc_sum += """<tr>
         <td colspan='2'> No bugs found </td>
       </tr>
     """
     table_di_loc_sum += "</table>\n"

     # Create the table for SP bugs.
     table_title_di_sp = "SP Bugs found by the Debugify"
     table_di_sp = """<table>
   <caption><b>{}</b></caption>
   <tr>
   """.format(
         table_title_di_sp
     )

     header_di_sp = ["File", "LLVM Pass Name", "Function Name", "Action"]

     for column in header_di_sp:
         table_di_sp += "    <th>{0}</th>\n".format(column.strip())
     table_di_sp += "  </tr>\n"

     at_least_one_bug_found = False

     # Handle fn bugs.
     for file, per_file_bugs in di_subprogram_bugs.items():
         for llvm_pass, per_pass_bugs in per_file_bugs.items():
             # No SP bugs for the pass.
             if len(per_pass_bugs) == 0:
                 continue
             at_least_one_bug_found = True
             row = []
             table_di_sp += "  </tr>\n"
             # Get the bugs info.
             for x in per_pass_bugs:
                 row.append("    <tr>\n")
                 row.append(file)
                 row.append(llvm_pass)
                 row.append(x.fn_name)
                 row.append(x.action)
                 row.append("    </tr>\n")
             # Dump the bugs info into the table.
             for column in row:
                 # The same file-pass pair can have multiple bugs.
                 if column == "    <tr>\n" or column == "    </tr>\n":
                     table_di_sp += column
                     continue
                 table_di_sp += "    <td>{0}</td>\n".format(column.strip())
             table_di_sp += "  <tr>\n"

     if not at_least_one_bug_found:
         table_di_sp += """<tr>
         <td colspan='4'> No bugs found </td>
       </tr>
     """
     table_di_sp += "</table>\n"

     # Create the summary table for the sp bugs.
     table_title_di_sp_sum = "Summary of SP Bugs"
     table_di_sp_sum = """<table>
   <caption><b>{}</b></caption>
   <tr>
   """.format(
         table_title_di_sp_sum
     )

     header_di_sp_sum = ["LLVM Pass Name", "Number of bugs"]

     for column in header_di_sp_sum:
         table_di_sp_sum += "    <th>{0}</th>\n".format(column.strip())
     table_di_sp_sum += "  </tr>\n"

     # Print the summary.
     row = []
     for llvm_pass, num in sorted(di_sp_bugs_summary.items()):
         row.append("    <tr>\n")
         row.append(llvm_pass)
         row.append(str(num))
         row.append("    </tr>\n")
     for column in row:
         if column == "    <tr>\n" or column == "    </tr>\n":
             table_di_sp_sum += column
             continue
         table_di_sp_sum += "    <td>{0}</td>\n".format(column.strip())
     table_di_sp_sum += "  <tr>\n"

     if not at_least_one_bug_found:
         table_di_sp_sum += """<tr>
         <td colspan='2'> No bugs found </td>
       </tr>
     """
     table_di_sp_sum += "</table>\n"

     # Create the table for Variable bugs.
     table_title_di_var = "Variable Location Bugs found by the Debugify"
     table_di_var = """<table>
   <caption><b>{}</b></caption>
   <tr>
   """.format(
         table_title_di_var
     )

     header_di_var = ["File", "LLVM Pass Name", "Variable", "Function", "Action"]

     for column in header_di_var:
         table_di_var += "    <th>{0}</th>\n".format(column.strip())
     table_di_var += "  </tr>\n"

     at_least_one_bug_found = False

     # Handle var bugs.
     for file, per_file_bugs in di_var_bugs.items():
         for llvm_pass, per_pass_bugs in per_file_bugs.items():
             # No SP bugs for the pass.
             if len(per_pass_bugs) == 0:
                 continue
             at_least_one_bug_found = True
             row = []
             table_di_var += "  </tr>\n"
             # Get the bugs info.
             for x in per_pass_bugs:
                 row.append("    <tr>\n")
                 row.append(file)
                 row.append(llvm_pass)
                 row.append(x.name)
                 row.append(x.fn_name)
                 row.append(x.action)
                 row.append("    </tr>\n")
             # Dump the bugs info into the table.
             for column in row:
                 # The same file-pass pair can have multiple bugs.
                 if column == "    <tr>\n" or column == "    </tr>\n":
                     table_di_var += column
                     continue
                 table_di_var += "    <td>{0}</td>\n".format(column.strip())
             table_di_var += "  <tr>\n"

     if not at_least_one_bug_found:
         table_di_var += """<tr>
         <td colspan='4'> No bugs found </td>
       </tr>
     """
     table_di_var += "</table>\n"

     # Create the summary table for the sp bugs.
     table_title_di_var_sum = "Summary of Variable Location Bugs"
     table_di_var_sum = """<table>
   <caption><b>{}</b></caption>
   <tr>
   """.format(
         table_title_di_var_sum
     )

     header_di_var_sum = ["LLVM Pass Name", "Number of bugs"]

     for column in header_di_var_sum:
         table_di_var_sum += "    <th>{0}</th>\n".format(column.strip())
     table_di_var_sum += "  </tr>\n"

     # Print the summary.
     row = []
     for llvm_pass, num in sorted(di_var_bugs_summary.items()):
         row.append("    <tr>\n")
         row.append(llvm_pass)
         row.append(str(num))
         row.append("    </tr>\n")
     for column in row:
         if column == "    <tr>\n" or column == "    </tr>\n":
             table_di_var_sum += column
             continue
         table_di_var_sum += "    <td>{0}</td>\n".format(column.strip())
     table_di_var_sum += "  <tr>\n"

     if not at_least_one_bug_found:
         table_di_var_sum += """<tr>
         <td colspan='2'> No bugs found </td>
       </tr>
     """
     table_di_var_sum += "</table>\n"

     # Finish the html page.
     html_footer = """</body>
   </html>"""

     new_line = "<br>\n"

     fileout.writelines(html_header)
     fileout.writelines(table_di_loc)
     fileout.writelines(new_line)
     fileout.writelines(table_di_loc_sum)
     fileout.writelines(new_line)
     fileout.writelines(new_line)
     fileout.writelines(table_di_sp)
     fileout.writelines(new_line)
     fileout.writelines(table_di_sp_sum)
     fileout.writelines(new_line)
     fileout.writelines(new_line)
     fileout.writelines(table_di_var)
     fileout.writelines(new_line)
     fileout.writelines(table_di_var_sum)
     fileout.writelines(html_footer)
     fileout.close()

     print("The " + html_file + " generated.")


 # Read the JSON file in chunks.
 def get_json_chunk(file, start, size):
     json_parsed = None
     di_checker_data = []
     skipped_lines = 0
     line = 0

     # The file contains json object per line.
     # An example of the line (formatted json):
     # {
     #  "file": "simple.c",
     #  "pass": "Deduce function attributes in RPO",
     #  "bugs": [
     #    [
     #      {
     #        "action": "drop",
     #        "metadata": "DISubprogram",
     #        "name": "fn2"
     #      },
     #      {
     #        "action": "drop",
     #        "metadata": "DISubprogram",
     #        "name": "fn1"
     #      }
     #    ]
     #  ]
     # }
     with open(file) as json_objects_file:
         for json_object_line in json_objects_file:
             line += 1
             if line < start:
                 continue
             if line >= start + size:
                 break
             try:
                 json_object = loads(json_object_line)
             except:
                 skipped_lines += 1
             else:
                 di_checker_data.append(json_object)

     return (di_checker_data, skipped_lines, line)


 # Parse the program arguments.
 def parse_program_args(parser):
     parser.add_argument("file_name", type=str, help="json file to process")
     parser.add_argument(
         "--reduce",
         action="store_true",
         help="create reduced report by deduplicating bugs within and across files",
     )

     report_type_group = parser.add_mutually_exclusive_group(required=True)
     report_type_group.add_argument(
         "--report-html-file", type=str, help="output HTML file for the generated report"
     )
     report_type_group.add_argument(
         "--acceptance-test",
         action="store_true",
         help="if set, produce terminal-friendly output and return 0 iff the input file is empty or does not exist",
     )

     return parser.parse_args()


 def Main():
     parser = argparse.ArgumentParser()
     opts = parse_program_args(parser)

     if opts.report_html_file is not None and not opts.report_html_file.endswith(
         ".html"
     ):
         print("error: The output file must be '.html'.")
         sys.exit(1)

     if opts.acceptance_test:
         if os.path.isdir(opts.file_name):
             print(f"error: Directory passed as input file: '{opts.file_name}'")
             sys.exit(1)
         if not os.path.exists(opts.file_name):
             # We treat an empty input file as a success, as debugify will generate an output file iff any errors are
             # found, meaning we expect 0 errors to mean that the expected file does not exist.
             print(f"No errors detected for: {opts.file_name}")
             sys.exit(0)

     # Use the defaultdict in order to make multidim dicts.
     di_location_bugs = defaultdict(lambda: defaultdict(list))
     di_subprogram_bugs = defaultdict(lambda: defaultdict(list))
     di_variable_bugs = defaultdict(lambda: defaultdict(list))

     # Use the ordered dict to make a summary.
     di_location_bugs_summary = OrderedDict()
     di_sp_bugs_summary = OrderedDict()
     di_var_bugs_summary = OrderedDict()

     # If we are using --reduce, use these sets to deduplicate similar bugs within and across files.
     di_loc_reduced_set = set()
     di_sp_reduced_set = set()
     di_var_reduced_set = set()

     start_line = 0
     chunk_size = 1000000
     end_line = chunk_size - 1
     skipped_lines = 0
     skipped_bugs = 0
     # Process each chunk of 1 million JSON lines.
     while True:
         if start_line > end_line:
             break
         (debug_info_bugs, skipped, end_line) = get_json_chunk(
             opts.file_name, start_line, chunk_size
         )
         start_line += chunk_size
         skipped_lines += skipped

         # Map the bugs into the file-pass pairs.
         for bugs_per_pass in debug_info_bugs:
             try:
                 bugs_file = bugs_per_pass["file"]
                 bugs_pass = bugs_per_pass["pass"]
                 bugs = bugs_per_pass["bugs"][0]
             except:
                 skipped_lines += 1
                 continue

             di_loc_bugs = di_location_bugs.get("bugs_file", {}).get("bugs_pass", [])
             di_sp_bugs = di_subprogram_bugs.get("bugs_file", {}).get("bugs_pass", [])
             di_var_bugs = di_variable_bugs.get("bugs_file", {}).get("bugs_pass", [])

             # Omit duplicated bugs.
             di_loc_set = set()
             di_sp_set = set()
             di_var_set = set()
             for bug in bugs:
                 try:
                     bugs_metadata = bug["metadata"]
                 except:
                     skipped_bugs += 1
                     continue

                 if bugs_metadata == "DILocation":
                     try:
                         origin = bug.get("origin")
                         action = bug["action"]
                         bb_name = bug["bb-name"]
                         fn_name = bug["fn-name"]
                         instr = bug["instr"]
                     except:
                         skipped_bugs += 1
                         continue
                     di_loc_bug = DILocBug(origin, action, bb_name, fn_name, instr)
                     if not di_loc_bug.key() in di_loc_set:
                         di_loc_set.add(di_loc_bug.key())
                         if opts.reduce:
                             reduced_key = di_loc_bug.reduced_key(bugs_pass)
                             if not reduced_key in di_loc_reduced_set:
                                 di_loc_reduced_set.add(reduced_key)
                                 di_loc_bugs.append(di_loc_bug)
                         else:
                             di_loc_bugs.append(di_loc_bug)

                     # Fill the summary dict.
                     if bugs_pass in di_location_bugs_summary:
                         di_location_bugs_summary[bugs_pass] += 1
                     else:
                         di_location_bugs_summary[bugs_pass] = 1
                 elif bugs_metadata == "DISubprogram":
                     try:
                         action = bug["action"]
                         name = bug["name"]
                     except:
                         skipped_bugs += 1
                         continue
                     di_sp_bug = DISPBug(action, name)
                     if not di_sp_bug.key() in di_sp_set:
                         di_sp_set.add(di_sp_bug.key())
                         if opts.reduce:
                             reduced_key = di_sp_bug.reduced_key(bugs_pass)
                             if not reduced_key in di_sp_reduced_set:
                                 di_sp_reduced_set.add(reduced_key)
                                 di_sp_bugs.append(di_sp_bug)
                         else:
                             di_sp_bugs.append(di_sp_bug)

                     # Fill the summary dict.
                     if bugs_pass in di_sp_bugs_summary:
                         di_sp_bugs_summary[bugs_pass] += 1
                     else:
                         di_sp_bugs_summary[bugs_pass] = 1
                 elif bugs_metadata == "dbg-var-intrinsic":
                     try:
                         action = bug["action"]
                         fn_name = bug["fn-name"]
                         name = bug["name"]
                     except:
                         skipped_bugs += 1
                         continue
                     di_var_bug = DIVarBug(action, name, fn_name)
                     if not di_var_bug.key() in di_var_set:
                         di_var_set.add(di_var_bug.key())
                         if opts.reduce:
                             reduced_key = di_var_bug.reduced_key(bugs_pass)
                             if not reduced_key in di_var_reduced_set:
                                 di_var_reduced_set.add(reduced_key)
                                 di_var_bugs.append(di_var_bug)
                         else:
                             di_var_bugs.append(di_var_bug)

                     # Fill the summary dict.
                     if bugs_pass in di_var_bugs_summary:
                         di_var_bugs_summary[bugs_pass] += 1
                     else:
                         di_var_bugs_summary[bugs_pass] = 1
                 else:
                     # Unsupported metadata.
                     skipped_bugs += 1
                     continue

             if di_loc_bugs:
                 di_location_bugs[bugs_file][bugs_pass] = di_loc_bugs
             if di_sp_bugs:
                 di_subprogram_bugs[bugs_file][bugs_pass] = di_sp_bugs
             if di_var_bugs:
                 di_variable_bugs[bugs_file][bugs_pass] = di_var_bugs

     if opts.report_html_file is not None:
         generate_html_report(
             di_location_bugs,
             di_subprogram_bugs,
             di_variable_bugs,
             di_location_bugs_summary,
             di_sp_bugs_summary,
             di_var_bugs_summary,
             opts.report_html_file,
         )
     else:
         # Pretty(ish) print the detected bugs, but check if any exist first so that we don't print an empty dict.
         if di_location_bugs:
             print_bugs_yaml("DILocation Bugs", di_location_bugs)
         if di_subprogram_bugs:
             print_bugs_yaml("DISubprogram Bugs", di_subprogram_bugs)
         if di_variable_bugs:
             print_bugs_yaml("DIVariable Bugs", di_variable_bugs)

     if opts.acceptance_test:
         if any((di_location_bugs, di_subprogram_bugs, di_variable_bugs)):
             # Add a newline gap after printing at least one error.
             print()
             print(f"Errors detected for: {opts.file_name}")
             sys.exit(1)
         else:
             print(f"No errors detected for: {opts.file_name}")

     if skipped_lines > 0:
         print("Skipped lines: " + str(skipped_lines))
     if skipped_bugs > 0:
         print("Skipped bugs: " + str(skipped_bugs))


 if __name__ == "__main__":
     Main()
     sys.exit(0)
	#!/usr/bin/env python
	#
	# Debugify summary for the original debug info testing.
	#

	from __future__ import print_function
	import argparse
	import os
	import re
	import sys
	from json import loads
	from collections import defaultdict
	from collections import OrderedDict

	class DILocBug:
	def __init__(self, origin, action, bb_name, fn_name, instr):
	self.origin = origin
	self.action = action
	self.bb_name = bb_name
	self.fn_name = fn_name
	self.instr = instr

	def key(self):
	return self.action + self.bb_name + self.fn_name + self.instr

	def reduced_key(self, bug_pass):
	if self.origin is not None:
	# If we have the origin stacktrace available, we can use it to efficiently deduplicate identical errors. We
	# just need to remove the pointer values from the string first, so that we can deduplicate across files.
	origin_no_addr = re.sub(r"0x[0-9a-fA-F]+", "", self.origin)
	return origin_no_addr
	return bug_pass + self.instr

	def to_dict(self):
	result = {
	"instr": self.instr,
	"fn_name": self.fn_name,
	"bb_name": self.bb_name,
	"action": self.action,
	}
	if self.origin:
	result["origin"] = self.origin
	return result


	class DISPBug:
	def __init__(self, action, fn_name):
	self.action = action
	self.fn_name = fn_name

	def key(self):
	return self.action + self.fn_name

	def reduced_key(self, bug_pass):
	return bug_pass + self.fn_name

	def to_dict(self):
	return {
	"fn_name": self.fn_name,
	"action": self.action,
	}


	class DIVarBug:
	def __init__(self, action, name, fn_name):
	self.action = action
	self.name = name
	self.fn_name = fn_name

	def key(self):
	return self.action + self.name + self.fn_name

	def reduced_key(self, bug_pass):
	return bug_pass + self.name

	def to_dict(self):
	return {
	"fn_name": self.fn_name,
	"name": self.name,
	"action": self.action,
	}


	def print_bugs_yaml(name, bugs_dict, indent=2):
	def get_bug_line(indent_level: int, text: str, margin_mark: bool = False):
	if margin_mark:
	return "- ".rjust(indent_level * indent) + text
	return " " * indent * indent_level + text

	print(f"{name}:")
	for bugs_file, bugs_pass_dict in sorted(iter(bugs_dict.items())):
	print(get_bug_line(1, f"{bugs_file}:"))
	for bugs_pass, bugs_list in sorted(iter(bugs_pass_dict.items())):
	print(get_bug_line(2, f"{bugs_pass}:"))
	for bug in bugs_list:
	bug_dict = bug.to_dict()
	first_line = True
	# First item needs a '-' in the margin.
	for key, val in sorted(iter(bug_dict.items())):
	if "\n" in val:
	# Output block text for any multiline string.
	print(get_bug_line(3, f"{key}: \|", first_line))
	for line in val.splitlines():
	print(get_bug_line(4, line))
	else:
	print(get_bug_line(3, f"{key}: {val}", first_line))
	first_line = False

	# Report the bugs in form of html.
	def generate_html_report(
	di_location_bugs,
	di_subprogram_bugs,
	di_var_bugs,
	di_location_bugs_summary,
	di_sp_bugs_summary,
	di_var_bugs_summary,
	html_file,
	):
	fileout = open(html_file, "w")

	html_header = """ <html>
	<head>
	<style>
	table, th, td {
	border: 1px solid black;
	}
	table.center {
	margin-left: auto;
	margin-right: auto;
	}
	</style>
	</head>
	<body>
	"""

	# Create the table for Location bugs.
	table_title_di_loc = "Location Bugs found by the Debugify"

	table_di_loc = """<table>
	<caption><b>{}</b></caption>
	<tr>
	""".format(
	table_title_di_loc
	)

	# If any DILocation bug has an origin stack trace, we emit an extra column in the table, which we must therefore
	# determine up-front.
	has_origin_col = any(
	x.origin is not None
	for per_file_bugs in di_location_bugs.values()
	for per_pass_bugs in per_file_bugs.values()
	for x in per_pass_bugs
	)

	header_di_loc = [
	"File",
	"LLVM Pass Name",
	"LLVM IR Instruction",
	"Function Name",
	"Basic Block Name",
	"Action",
	]
	if has_origin_col:
	header_di_loc.append("Origin")

	for column in header_di_loc:
	table_di_loc += " <th>{0}</th>\n".format(column.strip())
	table_di_loc += " </tr>\n"

	at_least_one_bug_found = False

	# Handle loction bugs.
	for file, per_file_bugs in di_location_bugs.items():
	for llvm_pass, per_pass_bugs in per_file_bugs.items():
	# No location bugs for the pass.
	if len(per_pass_bugs) == 0:
	continue
	at_least_one_bug_found = True
	row = []
	table_di_loc += " </tr>\n"
	# Get the bugs info.
	for x in per_pass_bugs:
	row.append(" <tr>\n")
	row.append(file)
	row.append(llvm_pass)
	row.append(x.instr)
	row.append(x.fn_name)
	row.append(x.bb_name)
	row.append(x.action)
	if has_origin_col:
	if x.origin is not None:
	row.append(
	f"<details><summary>View Origin StackTrace</summary><pre>{x.origin}</pre></details>"
	)
	else:
	row.append("")
	row.append(" </tr>\n")
	# Dump the bugs info into the table.
	for column in row:
	# The same file-pass pair can have multiple bugs.
	if column == " <tr>\n" or column == " </tr>\n":
	table_di_loc += column
	continue
	table_di_loc += " <td>{0}</td>\n".format(column.strip())
	table_di_loc += " <tr>\n"

	if not at_least_one_bug_found:
	table_di_loc += """ <tr>
	<td colspan='7'> No bugs found </td>
	</tr>
	"""
	table_di_loc += "</table>\n"

	# Create the summary table for the loc bugs.
	table_title_di_loc_sum = "Summary of Location Bugs"
	table_di_loc_sum = """<table>
	<caption><b>{}</b></caption>
	<tr>
	""".format(
	table_title_di_loc_sum
	)

	header_di_loc_sum = ["LLVM Pass Name", "Number of bugs"]

	for column in header_di_loc_sum:
	table_di_loc_sum += " <th>{0}</th>\n".format(column.strip())
	table_di_loc_sum += " </tr>\n"

	# Print the summary.
	row = []
	for llvm_pass, num in sorted(di_location_bugs_summary.items()):
	row.append(" <tr>\n")
	row.append(llvm_pass)
	row.append(str(num))
	row.append(" </tr>\n")
	for column in row:
	if column == " <tr>\n" or column == " </tr>\n":
	table_di_loc_sum += column
	continue
	table_di_loc_sum += " <td>{0}</td>\n".format(column.strip())
	table_di_loc_sum += " <tr>\n"

	if not at_least_one_bug_found:
	table_di_loc_sum += """<tr>
	<td colspan='2'> No bugs found </td>
	</tr>
	"""
	table_di_loc_sum += "</table>\n"

	# Create the table for SP bugs.
	table_title_di_sp = "SP Bugs found by the Debugify"
	table_di_sp = """<table>
	<caption><b>{}</b></caption>
	<tr>
	""".format(
	table_title_di_sp
	)

	header_di_sp = ["File", "LLVM Pass Name", "Function Name", "Action"]

	for column in header_di_sp:
	table_di_sp += " <th>{0}</th>\n".format(column.strip())
	table_di_sp += " </tr>\n"

	at_least_one_bug_found = False

	# Handle fn bugs.
	for file, per_file_bugs in di_subprogram_bugs.items():
	for llvm_pass, per_pass_bugs in per_file_bugs.items():
	# No SP bugs for the pass.
	if len(per_pass_bugs) == 0:
	continue
	at_least_one_bug_found = True
	row = []
	table_di_sp += " </tr>\n"
	# Get the bugs info.
	for x in per_pass_bugs:
	row.append(" <tr>\n")
	row.append(file)
	row.append(llvm_pass)
	row.append(x.fn_name)
	row.append(x.action)
	row.append(" </tr>\n")
	# Dump the bugs info into the table.
	for column in row:
	# The same file-pass pair can have multiple bugs.
	if column == " <tr>\n" or column == " </tr>\n":
	table_di_sp += column
	continue
	table_di_sp += " <td>{0}</td>\n".format(column.strip())
	table_di_sp += " <tr>\n"

	if not at_least_one_bug_found:
	table_di_sp += """<tr>
	<td colspan='4'> No bugs found </td>
	</tr>
	"""
	table_di_sp += "</table>\n"

	# Create the summary table for the sp bugs.
	table_title_di_sp_sum = "Summary of SP Bugs"
	table_di_sp_sum = """<table>
	<caption><b>{}</b></caption>
	<tr>
	""".format(
	table_title_di_sp_sum
	)

	header_di_sp_sum = ["LLVM Pass Name", "Number of bugs"]

	for column in header_di_sp_sum:
	table_di_sp_sum += " <th>{0}</th>\n".format(column.strip())
	table_di_sp_sum += " </tr>\n"

	# Print the summary.
	row = []
	for llvm_pass, num in sorted(di_sp_bugs_summary.items()):
	row.append(" <tr>\n")
	row.append(llvm_pass)
	row.append(str(num))
	row.append(" </tr>\n")
	for column in row:
	if column == " <tr>\n" or column == " </tr>\n":
	table_di_sp_sum += column
	continue
	table_di_sp_sum += " <td>{0}</td>\n".format(column.strip())
	table_di_sp_sum += " <tr>\n"

	if not at_least_one_bug_found:
	table_di_sp_sum += """<tr>
	<td colspan='2'> No bugs found </td>
	</tr>
	"""
	table_di_sp_sum += "</table>\n"

	# Create the table for Variable bugs.
	table_title_di_var = "Variable Location Bugs found by the Debugify"
	table_di_var = """<table>
	<caption><b>{}</b></caption>
	<tr>
	""".format(
	table_title_di_var
	)

	header_di_var = ["File", "LLVM Pass Name", "Variable", "Function", "Action"]

	for column in header_di_var:
	table_di_var += " <th>{0}</th>\n".format(column.strip())
	table_di_var += " </tr>\n"

	at_least_one_bug_found = False

	# Handle var bugs.
	for file, per_file_bugs in di_var_bugs.items():
	for llvm_pass, per_pass_bugs in per_file_bugs.items():
	# No SP bugs for the pass.
	if len(per_pass_bugs) == 0:
	continue
	at_least_one_bug_found = True
	row = []
	table_di_var += " </tr>\n"
	# Get the bugs info.
	for x in per_pass_bugs:
	row.append(" <tr>\n")
	row.append(file)
	row.append(llvm_pass)
	row.append(x.name)
	row.append(x.fn_name)
	row.append(x.action)
	row.append(" </tr>\n")
	# Dump the bugs info into the table.
	for column in row:
	# The same file-pass pair can have multiple bugs.
	if column == " <tr>\n" or column == " </tr>\n":
	table_di_var += column
	continue
	table_di_var += " <td>{0}</td>\n".format(column.strip())
	table_di_var += " <tr>\n"

	if not at_least_one_bug_found:
	table_di_var += """<tr>
	<td colspan='4'> No bugs found </td>
	</tr>
	"""
	table_di_var += "</table>\n"

	# Create the summary table for the sp bugs.
	table_title_di_var_sum = "Summary of Variable Location Bugs"
	table_di_var_sum = """<table>
	<caption><b>{}</b></caption>
	<tr>
	""".format(
	table_title_di_var_sum
	)

	header_di_var_sum = ["LLVM Pass Name", "Number of bugs"]

	for column in header_di_var_sum:
	table_di_var_sum += " <th>{0}</th>\n".format(column.strip())
	table_di_var_sum += " </tr>\n"

	# Print the summary.
	row = []
	for llvm_pass, num in sorted(di_var_bugs_summary.items()):
	row.append(" <tr>\n")
	row.append(llvm_pass)
	row.append(str(num))
	row.append(" </tr>\n")
	for column in row:
	if column == " <tr>\n" or column == " </tr>\n":
	table_di_var_sum += column
	continue
	table_di_var_sum += " <td>{0}</td>\n".format(column.strip())
	table_di_var_sum += " <tr>\n"

	if not at_least_one_bug_found:
	table_di_var_sum += """<tr>
	<td colspan='2'> No bugs found </td>
	</tr>
	"""
	table_di_var_sum += "</table>\n"

	# Finish the html page.
	html_footer = """</body>
	</html>"""

	new_line = "<br>\n"

	fileout.writelines(html_header)
	fileout.writelines(table_di_loc)
	fileout.writelines(new_line)
	fileout.writelines(table_di_loc_sum)
	fileout.writelines(new_line)
	fileout.writelines(new_line)
	fileout.writelines(table_di_sp)
	fileout.writelines(new_line)
	fileout.writelines(table_di_sp_sum)
	fileout.writelines(new_line)
	fileout.writelines(new_line)
	fileout.writelines(table_di_var)
	fileout.writelines(new_line)
	fileout.writelines(table_di_var_sum)
	fileout.writelines(html_footer)
	fileout.close()

	print("The " + html_file + " generated.")


	# Read the JSON file in chunks.
	def get_json_chunk(file, start, size):
	json_parsed = None
	di_checker_data = []
	skipped_lines = 0
	line = 0

	# The file contains json object per line.
	# An example of the line (formatted json):
	# {
	# "file": "simple.c",
	# "pass": "Deduce function attributes in RPO",
	# "bugs": [
	# [
	# {
	# "action": "drop",
	# "metadata": "DISubprogram",
	# "name": "fn2"
	# },
	# {
	# "action": "drop",
	# "metadata": "DISubprogram",
	# "name": "fn1"
	# }
	# ]
	# ]
	# }
	with open(file) as json_objects_file:
	for json_object_line in json_objects_file:
	line += 1
	if line < start:
	continue
	if line >= start + size:
	break
	try:
	json_object = loads(json_object_line)
	except:
	skipped_lines += 1
	else:
	di_checker_data.append(json_object)

	return (di_checker_data, skipped_lines, line)


	# Parse the program arguments.
	def parse_program_args(parser):
	parser.add_argument("file_name", type=str, help="json file to process")
	parser.add_argument(
	"--reduce",
	action="store_true",
	help="create reduced report by deduplicating bugs within and across files",
	)

	report_type_group = parser.add_mutually_exclusive_group(required=True)
	report_type_group.add_argument(
	"--report-html-file", type=str, help="output HTML file for the generated report"
	)
	report_type_group.add_argument(
	"--acceptance-test",
	action="store_true",
	help="if set, produce terminal-friendly output and return 0 iff the input file is empty or does not exist",
	)

	return parser.parse_args()


	def Main():
	parser = argparse.ArgumentParser()
	opts = parse_program_args(parser)

	if opts.report_html_file is not None and not opts.report_html_file.endswith(
	".html"
	):
	print("error: The output file must be '.html'.")
	sys.exit(1)

	if opts.acceptance_test:
	if os.path.isdir(opts.file_name):
	print(f"error: Directory passed as input file: '{opts.file_name}'")
	sys.exit(1)
	if not os.path.exists(opts.file_name):
	# We treat an empty input file as a success, as debugify will generate an output file iff any errors are
	# found, meaning we expect 0 errors to mean that the expected file does not exist.
	print(f"No errors detected for: {opts.file_name}")
	sys.exit(0)

	# Use the defaultdict in order to make multidim dicts.
	di_location_bugs = defaultdict(lambda: defaultdict(list))
	di_subprogram_bugs = defaultdict(lambda: defaultdict(list))
	di_variable_bugs = defaultdict(lambda: defaultdict(list))

	# Use the ordered dict to make a summary.
	di_location_bugs_summary = OrderedDict()
	di_sp_bugs_summary = OrderedDict()
	di_var_bugs_summary = OrderedDict()

	# If we are using --reduce, use these sets to deduplicate similar bugs within and across files.
	di_loc_reduced_set = set()
	di_sp_reduced_set = set()
	di_var_reduced_set = set()

	start_line = 0
	chunk_size = 1000000
	end_line = chunk_size - 1
	skipped_lines = 0
	skipped_bugs = 0
	# Process each chunk of 1 million JSON lines.
	while True:
	if start_line > end_line:
	break
	(debug_info_bugs, skipped, end_line) = get_json_chunk(
	opts.file_name, start_line, chunk_size
	)
	start_line += chunk_size
	skipped_lines += skipped

	# Map the bugs into the file-pass pairs.
	for bugs_per_pass in debug_info_bugs:
	try:
	bugs_file = bugs_per_pass["file"]
	bugs_pass = bugs_per_pass["pass"]
	bugs = bugs_per_pass["bugs"][0]
	except:
	skipped_lines += 1
	continue

	di_loc_bugs = di_location_bugs.get("bugs_file", {}).get("bugs_pass", [])
	di_sp_bugs = di_subprogram_bugs.get("bugs_file", {}).get("bugs_pass", [])
	di_var_bugs = di_variable_bugs.get("bugs_file", {}).get("bugs_pass", [])

	# Omit duplicated bugs.
	di_loc_set = set()
	di_sp_set = set()
	di_var_set = set()
	for bug in bugs:
	try:
	bugs_metadata = bug["metadata"]
	except:
	skipped_bugs += 1
	continue

	if bugs_metadata == "DILocation":
	try:
	origin = bug.get("origin")
	action = bug["action"]
	bb_name = bug["bb-name"]
	fn_name = bug["fn-name"]
	instr = bug["instr"]
	except:
	skipped_bugs += 1
	continue
	di_loc_bug = DILocBug(origin, action, bb_name, fn_name, instr)
	if not di_loc_bug.key() in di_loc_set:
	di_loc_set.add(di_loc_bug.key())
	if opts.reduce:
	reduced_key = di_loc_bug.reduced_key(bugs_pass)
	if not reduced_key in di_loc_reduced_set:
	di_loc_reduced_set.add(reduced_key)
	di_loc_bugs.append(di_loc_bug)
	else:
	di_loc_bugs.append(di_loc_bug)

	# Fill the summary dict.
	if bugs_pass in di_location_bugs_summary:
	di_location_bugs_summary[bugs_pass] += 1
	else:
	di_location_bugs_summary[bugs_pass] = 1
	elif bugs_metadata == "DISubprogram":
	try:
	action = bug["action"]
	name = bug["name"]
	except:
	skipped_bugs += 1
	continue
	di_sp_bug = DISPBug(action, name)
	if not di_sp_bug.key() in di_sp_set:
	di_sp_set.add(di_sp_bug.key())
	if opts.reduce:
	reduced_key = di_sp_bug.reduced_key(bugs_pass)
	if not reduced_key in di_sp_reduced_set:
	di_sp_reduced_set.add(reduced_key)
	di_sp_bugs.append(di_sp_bug)
	else:
	di_sp_bugs.append(di_sp_bug)

	# Fill the summary dict.
	if bugs_pass in di_sp_bugs_summary:
	di_sp_bugs_summary[bugs_pass] += 1
	else:
	di_sp_bugs_summary[bugs_pass] = 1
	elif bugs_metadata == "dbg-var-intrinsic":
	try:
	action = bug["action"]
	fn_name = bug["fn-name"]
	name = bug["name"]
	except:
	skipped_bugs += 1
	continue
	di_var_bug = DIVarBug(action, name, fn_name)
	if not di_var_bug.key() in di_var_set:
	di_var_set.add(di_var_bug.key())
	if opts.reduce:
	reduced_key = di_var_bug.reduced_key(bugs_pass)
	if not reduced_key in di_var_reduced_set:
	di_var_reduced_set.add(reduced_key)
	di_var_bugs.append(di_var_bug)
	else:
	di_var_bugs.append(di_var_bug)

	# Fill the summary dict.
	if bugs_pass in di_var_bugs_summary:
	di_var_bugs_summary[bugs_pass] += 1
	else:
	di_var_bugs_summary[bugs_pass] = 1
	else:
	# Unsupported metadata.
	skipped_bugs += 1
	continue

	if di_loc_bugs:
	di_location_bugs[bugs_file][bugs_pass] = di_loc_bugs
	if di_sp_bugs:
	di_subprogram_bugs[bugs_file][bugs_pass] = di_sp_bugs
	if di_var_bugs:
	di_variable_bugs[bugs_file][bugs_pass] = di_var_bugs

	if opts.report_html_file is not None:
	generate_html_report(
	di_location_bugs,
	di_subprogram_bugs,
	di_variable_bugs,
	di_location_bugs_summary,
	di_sp_bugs_summary,
	di_var_bugs_summary,
	opts.report_html_file,
	)
	else:
	# Pretty(ish) print the detected bugs, but check if any exist first so that we don't print an empty dict.
	if di_location_bugs:
	print_bugs_yaml("DILocation Bugs", di_location_bugs)
	if di_subprogram_bugs:
	print_bugs_yaml("DISubprogram Bugs", di_subprogram_bugs)
	if di_variable_bugs:
	print_bugs_yaml("DIVariable Bugs", di_variable_bugs)

	if opts.acceptance_test:
	if any((di_location_bugs, di_subprogram_bugs, di_variable_bugs)):
	# Add a newline gap after printing at least one error.
	print()
	print(f"Errors detected for: {opts.file_name}")
	sys.exit(1)
	else:
	print(f"No errors detected for: {opts.file_name}")

	if skipped_lines > 0:
	print("Skipped lines: " + str(skipped_lines))
	if skipped_bugs > 0:
	print("Skipped bugs: " + str(skipped_bugs))


	if __name__ == "__main__":
	Main()
	sys.exit(0)