| #!/usr/bin/env python3.8 |
| # Copyright 2020 The Fuchsia Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| import os |
| import datetime |
| import errno |
| import subprocess |
| |
| |
| class Corpus(object): |
| """Represents a corpus of fuzzing inputs. |
| |
| A fuzzing corpus is the set of "interesting" inputs as determined by the |
| individual fuzzer. See https://llvm.org/docs/LibFuzzer.html#corpus for |
| details on how libFuzzer uses corpora. |
| |
| Attributes: |
| fuzzer: The Fuzzer corresponding to this object. |
| nspaths: Path in namespace where the seed and/or working corpus are stored. |
| srcdir: Host path in the source tree where the seed corpus is stored. |
| """ |
| |
| def __init__(self, fuzzer, label): |
| self._fuzzer = fuzzer |
| self._nspaths = None |
| if not label: |
| self._srcdir = None |
| self._pkgdir = None |
| self._target = None |
| else: |
| # Parse GN label |
| if ':' in label: |
| target_dir, target = label.rsplit(':', 1) |
| else: |
| target_dir = label |
| target = os.path.basename(label) |
| |
| if os.path.basename(target_dir) == target: |
| # We assume that a label of this form corresponds to a corpus |
| # directory, i.e. one containing only corpus elements and a BUILD.gn |
| # for the resource() target. |
| self._srcdir = target_dir |
| self._pkgdir = target_dir |
| else: |
| # A label in this form doesn't correspond cleanly with a corpus |
| # directory, so we can't auto-determine a srcdir |
| self._srcdir = None |
| self._pkgdir = target_dir + "/" + target |
| |
| self._target = target |
| |
| @property |
| def fuzzer(self): |
| """The Fuzzer corresponding to this object.""" |
| return self._fuzzer |
| |
| @property |
| def buildenv(self): |
| """The BuildEnv corresponding to this object.""" |
| return self.fuzzer.buildenv |
| |
| @property |
| def host(self): |
| """Alias for fuzzer.host.""" |
| return self.fuzzer.host |
| |
| @property |
| def ns(self): |
| """Alias for fuzzer.ns.""" |
| return self.fuzzer.ns |
| |
| @property |
| def srcdir(self): |
| """Corpus directory in the source tree.""" |
| return self._srcdir |
| |
| @property |
| def nspaths(self): |
| """List of paths to where the corpus is stored on device. |
| |
| The first element is the mutable corpus. |
| """ |
| if not self._nspaths: |
| self.find_on_device() |
| return self._nspaths |
| |
| def find_on_device(self): |
| data = self.ns.data('corpus') |
| self.ns.mkdir(data) |
| if self._pkgdir: |
| resource = self.ns.resource(self._pkgdir) |
| self._nspaths = [data, resource] |
| else: |
| self._nspaths = [data] |
| |
| def reset(self): |
| """Remove any elements from the live corpus.""" |
| self.ns.remove(self.ns.data('corpus'), recursive=True) |
| self._nspaths = None |
| |
| def add_from_host(self, pathname): |
| """Copies elements from a host directory to the corpus on a device.""" |
| self.fuzzer.require_stopped() |
| if not self.host.isdir(pathname): |
| self.host.error('No such directory: {}'.format(pathname)) |
| pathname = os.path.join(pathname, '*') |
| return self.ns.store(self.nspaths[0], pathname) |
| |
| def add_from_gcs(self, gcs_url): |
| """Copies corpus elements from a GCS bucket to this corpus.""" |
| if not gcs_url.endswith('*'): |
| gcs_url += '/*' |
| with self.host.temp_dir() as temp_dir: |
| cmd = ['gsutil', '-m', 'cp', gcs_url, temp_dir.pathname] |
| try: |
| self.host.create_process(cmd).check_call() |
| except OSError as e: |
| if e.errno != errno.ENOENT: |
| raise |
| self.host.error( |
| 'Unable to find "gsutil", which is needed to download the corpus from GCS.', |
| 'You can skip downloading from GCS with the "--local" flag.' |
| ) |
| except subprocess.CalledProcessError: |
| self.host.error( |
| 'Failed to download corpus from GCS.', |
| 'You can skip downloading from GCS with the "--local" flag.' |
| ) |
| return self.add_from_host(temp_dir.pathname) |
| |
| def measure(self): |
| """Returns the number of corpus elements and corpus size as a pair.""" |
| total_num = 0 |
| total_size = 0 |
| for nspath in self.nspaths: |
| sizes = self.ns.ls(nspath) |
| total_num += len(sizes) |
| total_size += sum(sizes.values()) |
| return (total_num, total_size) |
| |
| def generate_buildfile(self, build_gn=None): |
| """Generates a BUILD.gn file for the seed corpus. |
| |
| Seed corpora are included in the source tree. In order to correctly |
| update packages including corpora, GN needs a listing of all the files |
| being packaged. This function can generate the necessary GN target on a |
| per-fuzzer basis. More than one fuzzer may use the same corpus. A fuzzer |
| package may also include several corpora, with each separate GN target |
| resulting in a different path in the package. |
| |
| Parameters: |
| build_gn Specifies where on the host filesystem the BUILD.gn file should be |
| generated. Defaults to the corpus source directory itself (the GN file will |
| be excluded from the list of corpus elements). |
| |
| Returns: |
| The list of elements found in the corpus. |
| """ |
| |
| if self._srcdir: |
| srcdir = self.buildenv.abspath(self._srcdir) |
| pkgdir = self._pkgdir |
| target = self._target |
| elif self._pkgdir: |
| self.host.error( |
| 'Automatic buildfile generation not available for ' + |
| 'corpus labels that don\'t correspond to a directory.') |
| else: |
| self.host.echo('No corpus set for {}.'.format(str(self.fuzzer))) |
| self.host.echo('Please enter a path to a corpus: ', end='') |
| srcdir = input() |
| srcdir = self.buildenv.abspath(srcdir) |
| pkgdir = srcdir |
| target = os.path.basename(srcdir) |
| if not self.host.isdir(srcdir): |
| self.host.error('No such directory: {}'.format(srcdir)) |
| |
| if build_gn: |
| build_gn = self.buildenv.abspath(build_gn) |
| comment = '# Generated using `fx fuzz update {} -o {}`.'.format( |
| str(self.fuzzer), self.buildenv.srcpath(build_gn)) |
| else: |
| build_gn = os.path.join(srcdir, 'BUILD.gn') |
| comment = '# Generated using `fx fuzz update {}`.'.format( |
| str(self.fuzzer)) |
| build_gn_dir = os.path.dirname(build_gn) |
| |
| elems = self.host.glob(os.path.join(srcdir, '*')) |
| elems = [ |
| os.path.relpath(elem, build_gn_dir) |
| for elem in elems |
| if self.host.isfile(elem) and elem != build_gn |
| ] |
| elems.sort() |
| |
| resource_line = 'resource("{}") {{'.format(target) |
| nested_scopes = 0 |
| current_target_lines = [] |
| lines_out = [] |
| srcdir = self.buildenv.srcpath(srcdir) |
| if self.host.isfile(build_gn): |
| with self.host.open(build_gn) as gn: |
| include_target = True |
| for line in gn: |
| line = line.rstrip() |
| if line == resource_line: |
| # Omit the section with the matching resource() target. |
| include_target = False |
| |
| elif line != '' or nested_scopes != 0: |
| # Delimit the file by blank lines at file scope. |
| nested_scopes += line.count('{') |
| nested_scopes -= line.count('}') |
| current_target_lines.append(line) |
| |
| elif include_target: |
| # Include all other sections. |
| lines_out += current_target_lines |
| lines_out.append('') |
| current_target_lines = [] |
| |
| else: |
| # Matching section discarded. Reset and continue. |
| current_target_lines = [] |
| include_target = True |
| |
| if include_target and current_target_lines: |
| lines_out += current_target_lines |
| lines_out.append('') |
| |
| else: |
| year = datetime.datetime.now().year |
| lines_out = [ |
| '# Copyright {} The Fuchsia Authors. All rights reserved.'. |
| format(year), |
| '# Use of this source code is governed by a BSD-style license that can be', |
| '# found in the LICENSE file.', |
| '', |
| '# WARNING: AUTOGENERATED FILE. DO NOT EDIT BY HAND.', |
| '', |
| 'import("//build/dist/resource.gni")', |
| '', |
| ] |
| lines_out += [comment, resource_line] |
| # It's not too much extra work to stay consistent with GN formatting. |
| if len(elems) == 0: |
| lines_out.append(' sources = []') |
| elif len(elems) == 1: |
| lines_out.append(' sources = [ "{}" ]'.format(elems[0])) |
| else: |
| lines_out.append(' sources = [') |
| lines_out += [' "{}",'.format(elem) for elem in elems] |
| lines_out.append(' ]') |
| lines_out += [ |
| ' outputs = [ "data/{}/{{{{source_file_part}}}}" ]'.format( |
| pkgdir[2:]), |
| '}', |
| '', |
| ] |
| with self.host.open(build_gn, 'w') as gn: |
| gn.write('\n'.join(lines_out)) |
| |
| if not self._srcdir and not self._pkgdir: |
| # No GN metadata for the corpus was detected, so we should try to add it |
| if not self.fuzzer.add_corpus_to_buildfile(srcdir): |
| self.host.error( |
| 'Failed to automatically add \'corpus = "{}"\'.'.format( |
| srcdir), |
| 'Please add the corpus parameter to {} manually.'.format( |
| str(self.fuzzer))) |
| |
| return elems |