scripts/oss-fuzz/minimize_qtest_trace.py - third_party/qemu - Git at Google

 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-

 """
 This takes a crashing qtest trace and tries to remove superfluous operations
 """

 import sys
 import os
 import subprocess
 import time
 import struct

 QEMU_ARGS = None
 QEMU_PATH = None
 TIMEOUT = 5
 CRASH_TOKEN = None

 # Minimization levels
 M1 = False # try removing IO commands iteratively
 M2 = False # try setting bits in operand of write/out to zero

 write_suffix_lookup = {"b": (1, "B"),
                        "w": (2, "H"),
                        "l": (4, "L"),
                        "q": (8, "Q")}

 def usage():
     sys.exit("""\
 Usage:

 QEMU_PATH="/path/to/qemu" QEMU_ARGS="args" {} [Options] input_trace output_trace

 By default, will try to use the second-to-last line in the output to identify
 whether the crash occred. Optionally, manually set a string that idenitifes the
 crash by setting CRASH_TOKEN=

 Options:

 -M1: enable a loop around the remove minimizer, which may help decrease some
      timing dependent instructions. Off by default.
 -M2: try setting bits in operand of write/out to zero. Off by default.

 """.format((sys.argv[0])))

 deduplication_note = """\n\
 Note: While trimming the input, sometimes the mutated trace triggers a different
 type crash but indicates the same bug. Under this situation, our minimizer is
 incapable of recognizing and stopped from removing it. In the future, we may
 use a more sophisticated crash case deduplication method.
 \n"""

 def check_if_trace_crashes(trace, path):
     with open(path, "w") as tracefile:
         tracefile.write("".join(trace))

     rc = subprocess.Popen("timeout -s 9 {timeout}s {qemu_path} {qemu_args} 2>&1\
     < {trace_path}".format(timeout=TIMEOUT,
                            qemu_path=QEMU_PATH,
                            qemu_args=QEMU_ARGS,
                            trace_path=path),
                           shell=True,
                           stdin=subprocess.PIPE,
                           stdout=subprocess.PIPE,
                           encoding="utf-8")
     global CRASH_TOKEN
     if CRASH_TOKEN is None:
         try:
             outs, _ = rc.communicate(timeout=5)
             CRASH_TOKEN = " ".join(outs.splitlines()[-2].split()[0:3])
         except subprocess.TimeoutExpired:
             print("subprocess.TimeoutExpired")
             return False
         print("Identifying Crashes by this string: {}".format(CRASH_TOKEN))
         global deduplication_note
         print(deduplication_note)
         return True

     for line in iter(rc.stdout.readline, ""):
         if "CLOSED" in line:
             return False
         if CRASH_TOKEN in line:
             return True

     print("\nWarning:")
     print("  There is no 'CLOSED'or CRASH_TOKEN in the stdout of subprocess.")
     print("  Usually this indicates a different type of crash.\n")
     return False


 # If previous write commands write the same length of data at the same
 # interval, we view it as a hint.
 def split_write_hint(newtrace, i):
     HINT_LEN = 3 # > 2
     if i <=(HINT_LEN-1):
         return None

     #find previous continuous write traces
     k = 0
     l = i-1
     writes = []
     while (k != HINT_LEN and l >= 0):
         if newtrace[l].startswith("write "):
             writes.append(newtrace[l])
             k += 1
             l -= 1
         elif newtrace[l] == "":
             l -= 1
         else:
             return None
     if k != HINT_LEN:
         return None

     length = int(writes[0].split()[2], 16)
     for j in range(1, HINT_LEN):
         if length != int(writes[j].split()[2], 16):
             return None

     step = int(writes[0].split()[1], 16) - int(writes[1].split()[1], 16)
     for j in range(1, HINT_LEN-1):
         if step != int(writes[j].split()[1], 16) - \
             int(writes[j+1].split()[1], 16):
             return None

     return (int(writes[0].split()[1], 16)+step, length)


 def remove_lines(newtrace, outpath):
     remove_step = 1
     i = 0
     while i < len(newtrace):
         # 1.) Try to remove lines completely and reproduce the crash.
         # If it works, we're done.
         if (i+remove_step) >= len(newtrace):
             remove_step = 1
         prior = newtrace[i:i+remove_step]
         for j in range(i, i+remove_step):
             newtrace[j] = ""
         print("Removing {lines} ...\n".format(lines=prior))
         if check_if_trace_crashes(newtrace, outpath):
             i += remove_step
             # Double the number of lines to remove for next round
             remove_step *= 2
             continue
         # Failed to remove multiple IOs, fast recovery
         if remove_step > 1:
             for j in range(i, i+remove_step):
                 newtrace[j] = prior[j-i]
             remove_step = 1
             continue
         newtrace[i] = prior[0] # remove_step = 1

         # 2.) Try to replace write{bwlq} commands with a write addr, len
         # command. Since this can require swapping endianness, try both LE and
         # BE options. We do this, so we can "trim" the writes in (3)

         if (newtrace[i].startswith("write") and not
             newtrace[i].startswith("write ")):
             suffix = newtrace[i].split()[0][-1]
             assert(suffix in write_suffix_lookup)
             addr = int(newtrace[i].split()[1], 16)
             value = int(newtrace[i].split()[2], 16)
             for endianness in ['<', '>']:
                 data = struct.pack("{end}{size}".format(end=endianness,
                                    size=write_suffix_lookup[suffix][1]),
                                    value)
                 newtrace[i] = "write {addr} {size} 0x{data}\n".format(
                     addr=hex(addr),
                     size=hex(write_suffix_lookup[suffix][0]),
                     data=data.hex())
                 if(check_if_trace_crashes(newtrace, outpath)):
                     break
             else:
                 newtrace[i] = prior[0]

         # 3.) If it is a qtest write command: write addr len data, try to split
         # it into two separate write commands. If splitting the data operand
         # from length/2^n bytes to the left does not work, try to move the pivot
         # to the right side, then add one to n, until length/2^n == 0. The idea
         # is to prune unnecessary bytes from long writes, while accommodating
         # arbitrary MemoryRegion access sizes and alignments.

         # This algorithm will fail under some rare situations.
         # e.g., xxxxxxxxxuxxxxxx (u is the unnecessary byte)

         if newtrace[i].startswith("write "):
             addr = int(newtrace[i].split()[1], 16)
             length = int(newtrace[i].split()[2], 16)
             data = newtrace[i].split()[3][2:]
             if length > 1:

                 # Can we get a hint from previous writes?
                 hint = split_write_hint(newtrace, i)
                 if hint is not None:
                     hint_addr = hint[0]
                     hint_len = hint[1]
                     if hint_addr >= addr and hint_addr+hint_len <= addr+length:
                         newtrace[i] = "write {addr} {size} 0x{data}\n".format(
                             addr=hex(hint_addr),
                             size=hex(hint_len),
                             data=data[(hint_addr-addr)*2:\
                                 (hint_addr-addr)*2+hint_len*2])
                         if check_if_trace_crashes(newtrace, outpath):
                             # next round
                             i += 1
                             continue
                         newtrace[i] = prior[0]

                 # Try splitting it using a binary approach
                 leftlength = int(length/2)
                 rightlength = length - leftlength
                 newtrace.insert(i+1, "")
                 power = 1
                 while leftlength > 0:
                     newtrace[i] = "write {addr} {size} 0x{data}\n".format(
                             addr=hex(addr),
                             size=hex(leftlength),
                             data=data[:leftlength*2])
                     newtrace[i+1] = "write {addr} {size} 0x{data}\n".format(
                             addr=hex(addr+leftlength),
                             size=hex(rightlength),
                             data=data[leftlength*2:])
                     if check_if_trace_crashes(newtrace, outpath):
                         break
                     # move the pivot to right side
                     if leftlength < rightlength:
                         rightlength, leftlength = leftlength, rightlength
                         continue
                     power += 1
                     leftlength = int(length/pow(2, power))
                     rightlength = length - leftlength
                 if check_if_trace_crashes(newtrace, outpath):
                     i -= 1
                 else:
                     newtrace[i] = prior[0]
                     del newtrace[i+1]
         i += 1


 def clear_bits(newtrace, outpath):
     # try setting bits in operands of out/write to zero
     i = 0
     while i < len(newtrace):
         if (not newtrace[i].startswith("write ") and not
            newtrace[i].startswith("out")):
            i += 1
            continue
         # write ADDR SIZE DATA
         # outx ADDR VALUE
         print("\nzero setting bits: {}".format(newtrace[i]))

         prefix = " ".join(newtrace[i].split()[:-1])
         data = newtrace[i].split()[-1]
         data_bin = bin(int(data, 16))
         data_bin_list = list(data_bin)

         for j in range(2, len(data_bin_list)):
             prior = newtrace[i]
             if (data_bin_list[j] == '1'):
                 data_bin_list[j] = '0'
                 data_try = hex(int("".join(data_bin_list), 2))
                 # It seems qtest only accepts padded hex-values.
                 if len(data_try) % 2 == 1:
                     data_try = data_try[:2] + "0" + data_try[2:]

                 newtrace[i] = "{prefix} {data_try}\n".format(
                         prefix=prefix,
                         data_try=data_try)

                 if not check_if_trace_crashes(newtrace, outpath):
                     data_bin_list[j] = '1'
                     newtrace[i] = prior
         i += 1


 def minimize_trace(inpath, outpath):
     global TIMEOUT
     with open(inpath) as f:
         trace = f.readlines()
     start = time.time()
     if not check_if_trace_crashes(trace, outpath):
         sys.exit("The input qtest trace didn't cause a crash...")
     end = time.time()
     print("Crashed in {} seconds".format(end-start))
     TIMEOUT = (end-start)*5
     print("Setting the timeout for {} seconds".format(TIMEOUT))

     newtrace = trace[:]
     global M1, M2

     # remove lines
     old_len = len(newtrace) + 1
     while(old_len > len(newtrace)):
         old_len = len(newtrace)
         print("trace length = ", old_len)
         remove_lines(newtrace, outpath)
         if not M1 and not M2:
             break
         newtrace = list(filter(lambda s: s != "", newtrace))
     assert(check_if_trace_crashes(newtrace, outpath))

     # set bits to zero
     if M2:
         clear_bits(newtrace, outpath)
     assert(check_if_trace_crashes(newtrace, outpath))


 if __name__ == '__main__':
     if len(sys.argv) < 3:
         usage()
     if "-M1" in sys.argv:
         M1 = True
     if "-M2" in sys.argv:
         M2 = True
     QEMU_PATH = os.getenv("QEMU_PATH")
     QEMU_ARGS = os.getenv("QEMU_ARGS")
     if QEMU_PATH is None or QEMU_ARGS is None:
         usage()
     # if "accel" not in QEMU_ARGS:
     #     QEMU_ARGS += " -accel qtest"
     CRASH_TOKEN = os.getenv("CRASH_TOKEN")
     QEMU_ARGS += " -qtest stdio -monitor none -serial none "
     minimize_trace(sys.argv[-2], sys.argv[-1])
	#!/usr/bin/env python3
	# -- coding: utf-8 --

	"""
	This takes a crashing qtest trace and tries to remove superfluous operations
	"""

	import sys
	import os
	import subprocess
	import time
	import struct

	QEMU_ARGS = None
	QEMU_PATH = None
	TIMEOUT = 5
	CRASH_TOKEN = None

	# Minimization levels
	M1 = False # try removing IO commands iteratively
	M2 = False # try setting bits in operand of write/out to zero

	write_suffix_lookup = {"b": (1, "B"),
	"w": (2, "H"),
	"l": (4, "L"),
	"q": (8, "Q")}

	def usage():
	sys.exit("""\
	Usage:

	QEMU_PATH="/path/to/qemu" QEMU_ARGS="args" {} [Options] input_trace output_trace

	By default, will try to use the second-to-last line in the output to identify
	whether the crash occred. Optionally, manually set a string that idenitifes the
	crash by setting CRASH_TOKEN=

	Options:

	-M1: enable a loop around the remove minimizer, which may help decrease some
	timing dependent instructions. Off by default.
	-M2: try setting bits in operand of write/out to zero. Off by default.

	""".format((sys.argv[0])))

	deduplication_note = """\n\
	Note: While trimming the input, sometimes the mutated trace triggers a different
	type crash but indicates the same bug. Under this situation, our minimizer is
	incapable of recognizing and stopped from removing it. In the future, we may
	use a more sophisticated crash case deduplication method.
	\n"""

	def check_if_trace_crashes(trace, path):
	with open(path, "w") as tracefile:
	tracefile.write("".join(trace))

	rc = subprocess.Popen("timeout -s 9 {timeout}s {qemu_path} {qemu_args} 2>&1\
	< {trace_path}".format(timeout=TIMEOUT,
	qemu_path=QEMU_PATH,
	qemu_args=QEMU_ARGS,
	trace_path=path),
	shell=True,
	stdin=subprocess.PIPE,
	stdout=subprocess.PIPE,
	encoding="utf-8")
	global CRASH_TOKEN
	if CRASH_TOKEN is None:
	try:
	outs, _ = rc.communicate(timeout=5)
	CRASH_TOKEN = " ".join(outs.splitlines()[-2].split()[0:3])
	except subprocess.TimeoutExpired:
	print("subprocess.TimeoutExpired")
	return False
	print("Identifying Crashes by this string: {}".format(CRASH_TOKEN))
	global deduplication_note
	print(deduplication_note)
	return True

	for line in iter(rc.stdout.readline, ""):
	if "CLOSED" in line:
	return False
	if CRASH_TOKEN in line:
	return True

	print("\nWarning:")
	print(" There is no 'CLOSED'or CRASH_TOKEN in the stdout of subprocess.")
	print(" Usually this indicates a different type of crash.\n")
	return False


	# If previous write commands write the same length of data at the same
	# interval, we view it as a hint.
	def split_write_hint(newtrace, i):
	HINT_LEN = 3 # > 2
	if i <=(HINT_LEN-1):
	return None

	#find previous continuous write traces
	k = 0
	l = i-1
	writes = []
	while (k != HINT_LEN and l >= 0):
	if newtrace[l].startswith("write "):
	writes.append(newtrace[l])
	k += 1
	l -= 1
	elif newtrace[l] == "":
	l -= 1
	else:
	return None
	if k != HINT_LEN:
	return None

	length = int(writes[0].split()[2], 16)
	for j in range(1, HINT_LEN):
	if length != int(writes[j].split()[2], 16):
	return None

	step = int(writes[0].split()[1], 16) - int(writes[1].split()[1], 16)
	for j in range(1, HINT_LEN-1):
	if step != int(writes[j].split()[1], 16) - \
	int(writes[j+1].split()[1], 16):
	return None

	return (int(writes[0].split()[1], 16)+step, length)


	def remove_lines(newtrace, outpath):
	remove_step = 1
	i = 0
	while i < len(newtrace):
	# 1.) Try to remove lines completely and reproduce the crash.
	# If it works, we're done.
	if (i+remove_step) >= len(newtrace):
	remove_step = 1
	prior = newtrace[i:i+remove_step]
	for j in range(i, i+remove_step):
	newtrace[j] = ""
	print("Removing {lines} ...\n".format(lines=prior))
	if check_if_trace_crashes(newtrace, outpath):
	i += remove_step
	# Double the number of lines to remove for next round
	remove_step *= 2
	continue
	# Failed to remove multiple IOs, fast recovery
	if remove_step > 1:
	for j in range(i, i+remove_step):
	newtrace[j] = prior[j-i]
	remove_step = 1
	continue
	newtrace[i] = prior[0] # remove_step = 1

	# 2.) Try to replace write{bwlq} commands with a write addr, len
	# command. Since this can require swapping endianness, try both LE and
	# BE options. We do this, so we can "trim" the writes in (3)

	if (newtrace[i].startswith("write") and not
	newtrace[i].startswith("write ")):
	suffix = newtrace[i].split()[0][-1]
	assert(suffix in write_suffix_lookup)
	addr = int(newtrace[i].split()[1], 16)
	value = int(newtrace[i].split()[2], 16)
	for endianness in ['<', '>']:
	data = struct.pack("{end}{size}".format(end=endianness,
	size=write_suffix_lookup[suffix][1]),
	value)
	newtrace[i] = "write {addr} {size} 0x{data}\n".format(
	addr=hex(addr),
	size=hex(write_suffix_lookup[suffix][0]),
	data=data.hex())
	if(check_if_trace_crashes(newtrace, outpath)):
	break
	else:
	newtrace[i] = prior[0]

	# 3.) If it is a qtest write command: write addr len data, try to split
	# it into two separate write commands. If splitting the data operand
	# from length/2^n bytes to the left does not work, try to move the pivot
	# to the right side, then add one to n, until length/2^n == 0. The idea
	# is to prune unnecessary bytes from long writes, while accommodating
	# arbitrary MemoryRegion access sizes and alignments.

	# This algorithm will fail under some rare situations.
	# e.g., xxxxxxxxxuxxxxxx (u is the unnecessary byte)

	if newtrace[i].startswith("write "):
	addr = int(newtrace[i].split()[1], 16)
	length = int(newtrace[i].split()[2], 16)
	data = newtrace[i].split()[3][2:]
	if length > 1:

	# Can we get a hint from previous writes?
	hint = split_write_hint(newtrace, i)
	if hint is not None:
	hint_addr = hint[0]
	hint_len = hint[1]
	if hint_addr >= addr and hint_addr+hint_len <= addr+length:
	newtrace[i] = "write {addr} {size} 0x{data}\n".format(
	addr=hex(hint_addr),
	size=hex(hint_len),
	data=data[(hint_addr-addr)*2:\
	(hint_addr-addr)2+hint_len2])
	if check_if_trace_crashes(newtrace, outpath):
	# next round
	i += 1
	continue
	newtrace[i] = prior[0]

	# Try splitting it using a binary approach
	leftlength = int(length/2)
	rightlength = length - leftlength
	newtrace.insert(i+1, "")
	power = 1
	while leftlength > 0:
	newtrace[i] = "write {addr} {size} 0x{data}\n".format(
	addr=hex(addr),
	size=hex(leftlength),
	data=data[:leftlength*2])
	newtrace[i+1] = "write {addr} {size} 0x{data}\n".format(
	addr=hex(addr+leftlength),
	size=hex(rightlength),
	data=data[leftlength*2:])
	if check_if_trace_crashes(newtrace, outpath):
	break
	# move the pivot to right side
	if leftlength < rightlength:
	rightlength, leftlength = leftlength, rightlength
	continue
	power += 1
	leftlength = int(length/pow(2, power))
	rightlength = length - leftlength
	if check_if_trace_crashes(newtrace, outpath):
	i -= 1
	else:
	newtrace[i] = prior[0]
	del newtrace[i+1]
	i += 1


	def clear_bits(newtrace, outpath):
	# try setting bits in operands of out/write to zero
	i = 0
	while i < len(newtrace):
	if (not newtrace[i].startswith("write ") and not
	newtrace[i].startswith("out")):
	i += 1
	continue
	# write ADDR SIZE DATA
	# outx ADDR VALUE
	print("\nzero setting bits: {}".format(newtrace[i]))

	prefix = " ".join(newtrace[i].split()[:-1])
	data = newtrace[i].split()[-1]
	data_bin = bin(int(data, 16))
	data_bin_list = list(data_bin)

	for j in range(2, len(data_bin_list)):
	prior = newtrace[i]
	if (data_bin_list[j] == '1'):
	data_bin_list[j] = '0'
	data_try = hex(int("".join(data_bin_list), 2))
	# It seems qtest only accepts padded hex-values.
	if len(data_try) % 2 == 1:
	data_try = data_try[:2] + "0" + data_try[2:]

	newtrace[i] = "{prefix} {data_try}\n".format(
	prefix=prefix,
	data_try=data_try)

	if not check_if_trace_crashes(newtrace, outpath):
	data_bin_list[j] = '1'
	newtrace[i] = prior
	i += 1


	def minimize_trace(inpath, outpath):
	global TIMEOUT
	with open(inpath) as f:
	trace = f.readlines()
	start = time.time()
	if not check_if_trace_crashes(trace, outpath):
	sys.exit("The input qtest trace didn't cause a crash...")
	end = time.time()
	print("Crashed in {} seconds".format(end-start))
	TIMEOUT = (end-start)*5
	print("Setting the timeout for {} seconds".format(TIMEOUT))

	newtrace = trace[:]
	global M1, M2

	# remove lines
	old_len = len(newtrace) + 1
	while(old_len > len(newtrace)):
	old_len = len(newtrace)
	print("trace length = ", old_len)
	remove_lines(newtrace, outpath)
	if not M1 and not M2:
	break
	newtrace = list(filter(lambda s: s != "", newtrace))
	assert(check_if_trace_crashes(newtrace, outpath))

	# set bits to zero
	if M2:
	clear_bits(newtrace, outpath)
	assert(check_if_trace_crashes(newtrace, outpath))


	if __name__ == '__main__':
	if len(sys.argv) < 3:
	usage()
	if "-M1" in sys.argv:
	M1 = True
	if "-M2" in sys.argv:
	M2 = True
	QEMU_PATH = os.getenv("QEMU_PATH")
	QEMU_ARGS = os.getenv("QEMU_ARGS")
	if QEMU_PATH is None or QEMU_ARGS is None:
	usage()
	# if "accel" not in QEMU_ARGS:
	# QEMU_ARGS += " -accel qtest"
	CRASH_TOKEN = os.getenv("CRASH_TOKEN")
	QEMU_ARGS += " -qtest stdio -monitor none -serial none "
	minimize_trace(sys.argv[-2], sys.argv[-1])