| # -*- coding: utf-8 -*- |
| # The LLVM Compiler Infrastructure |
| # |
| # This file is distributed under the University of Illinois Open Source |
| # License. See LICENSE.TXT for details. |
| """ This module is responsible to capture the compiler invocation of any |
| build process. The result of that should be a compilation database. |
| |
| This implementation is using the LD_PRELOAD or DYLD_INSERT_LIBRARIES |
| mechanisms provided by the dynamic linker. The related library is implemented |
| in C language and can be found under 'libear' directory. |
| |
| The 'libear' library is capturing all child process creation and logging the |
| relevant information about it into separate files in a specified directory. |
| The parameter of this process is the output directory name, where the report |
| files shall be placed. This parameter is passed as an environment variable. |
| |
| The module also implements compiler wrappers to intercept the compiler calls. |
| |
| The module implements the build command execution and the post-processing of |
| the output files, which will condensates into a compilation database. """ |
| |
| import sys |
| import os |
| import os.path |
| import re |
| import itertools |
| import json |
| import glob |
| import argparse |
| import logging |
| import subprocess |
| from libear import build_libear, TemporaryDirectory |
| from libscanbuild import command_entry_point |
| from libscanbuild import duplicate_check, tempdir, initialize_logging |
| from libscanbuild.compilation import split_command |
| from libscanbuild.shell import encode, decode |
| |
| __all__ = ['capture', 'intercept_build_main', 'intercept_build_wrapper'] |
| |
| GS = chr(0x1d) |
| RS = chr(0x1e) |
| US = chr(0x1f) |
| |
| COMPILER_WRAPPER_CC = 'intercept-cc' |
| COMPILER_WRAPPER_CXX = 'intercept-c++' |
| |
| |
| @command_entry_point |
| def intercept_build_main(bin_dir): |
| """ Entry point for 'intercept-build' command. """ |
| |
| parser = create_parser() |
| args = parser.parse_args() |
| |
| initialize_logging(args.verbose) |
| logging.debug('Parsed arguments: %s', args) |
| |
| if not args.build: |
| parser.print_help() |
| return 0 |
| |
| return capture(args, bin_dir) |
| |
| |
| def capture(args, bin_dir): |
| """ The entry point of build command interception. """ |
| |
| def post_processing(commands): |
| """ To make a compilation database, it needs to filter out commands |
| which are not compiler calls. Needs to find the source file name |
| from the arguments. And do shell escaping on the command. |
| |
| To support incremental builds, it is desired to read elements from |
| an existing compilation database from a previous run. These elements |
| shall be merged with the new elements. """ |
| |
| # create entries from the current run |
| current = itertools.chain.from_iterable( |
| # creates a sequence of entry generators from an exec, |
| format_entry(command) for command in commands) |
| # read entries from previous run |
| if 'append' in args and args.append and os.path.isfile(args.cdb): |
| with open(args.cdb) as handle: |
| previous = iter(json.load(handle)) |
| else: |
| previous = iter([]) |
| # filter out duplicate entries from both |
| duplicate = duplicate_check(entry_hash) |
| return (entry |
| for entry in itertools.chain(previous, current) |
| if os.path.exists(entry['file']) and not duplicate(entry)) |
| |
| with TemporaryDirectory(prefix='intercept-', dir=tempdir()) as tmp_dir: |
| # run the build command |
| environment = setup_environment(args, tmp_dir, bin_dir) |
| logging.debug('run build in environment: %s', environment) |
| exit_code = subprocess.call(args.build, env=environment) |
| logging.info('build finished with exit code: %d', exit_code) |
| # read the intercepted exec calls |
| exec_traces = itertools.chain.from_iterable( |
| parse_exec_trace(os.path.join(tmp_dir, filename)) |
| for filename in sorted(glob.iglob(os.path.join(tmp_dir, '*.cmd')))) |
| # do post processing only if that was requested |
| if 'raw_entries' not in args or not args.raw_entries: |
| entries = post_processing(exec_traces) |
| else: |
| entries = exec_traces |
| # dump the compilation database |
| with open(args.cdb, 'w+') as handle: |
| json.dump(list(entries), handle, sort_keys=True, indent=4) |
| return exit_code |
| |
| |
| def setup_environment(args, destination, bin_dir): |
| """ Sets up the environment for the build command. |
| |
| It sets the required environment variables and execute the given command. |
| The exec calls will be logged by the 'libear' preloaded library or by the |
| 'wrapper' programs. """ |
| |
| c_compiler = args.cc if 'cc' in args else 'cc' |
| cxx_compiler = args.cxx if 'cxx' in args else 'c++' |
| |
| libear_path = None if args.override_compiler or is_preload_disabled( |
| sys.platform) else build_libear(c_compiler, destination) |
| |
| environment = dict(os.environ) |
| environment.update({'INTERCEPT_BUILD_TARGET_DIR': destination}) |
| |
| if not libear_path: |
| logging.debug('intercept gonna use compiler wrappers') |
| environment.update({ |
| 'CC': os.path.join(bin_dir, COMPILER_WRAPPER_CC), |
| 'CXX': os.path.join(bin_dir, COMPILER_WRAPPER_CXX), |
| 'INTERCEPT_BUILD_CC': c_compiler, |
| 'INTERCEPT_BUILD_CXX': cxx_compiler, |
| 'INTERCEPT_BUILD_VERBOSE': 'DEBUG' if args.verbose > 2 else 'INFO' |
| }) |
| elif sys.platform == 'darwin': |
| logging.debug('intercept gonna preload libear on OSX') |
| environment.update({ |
| 'DYLD_INSERT_LIBRARIES': libear_path, |
| 'DYLD_FORCE_FLAT_NAMESPACE': '1' |
| }) |
| else: |
| logging.debug('intercept gonna preload libear on UNIX') |
| environment.update({'LD_PRELOAD': libear_path}) |
| |
| return environment |
| |
| |
| def intercept_build_wrapper(cplusplus): |
| """ Entry point for `intercept-cc` and `intercept-c++` compiler wrappers. |
| |
| It does generate execution report into target directory. And execute |
| the wrapped compilation with the real compiler. The parameters for |
| report and execution are from environment variables. |
| |
| Those parameters which for 'libear' library can't have meaningful |
| values are faked. """ |
| |
| # initialize wrapper logging |
| logging.basicConfig(format='intercept: %(levelname)s: %(message)s', |
| level=os.getenv('INTERCEPT_BUILD_VERBOSE', 'INFO')) |
| # write report |
| try: |
| target_dir = os.getenv('INTERCEPT_BUILD_TARGET_DIR') |
| if not target_dir: |
| raise UserWarning('exec report target directory not found') |
| pid = str(os.getpid()) |
| target_file = os.path.join(target_dir, pid + '.cmd') |
| logging.debug('writing exec report to: %s', target_file) |
| with open(target_file, 'ab') as handler: |
| working_dir = os.getcwd() |
| command = US.join(sys.argv) + US |
| content = RS.join([pid, pid, 'wrapper', working_dir, command]) + GS |
| handler.write(content.encode('utf-8')) |
| except IOError: |
| logging.exception('writing exec report failed') |
| except UserWarning as warning: |
| logging.warning(warning) |
| # execute with real compiler |
| compiler = os.getenv('INTERCEPT_BUILD_CXX', 'c++') if cplusplus \ |
| else os.getenv('INTERCEPT_BUILD_CC', 'cc') |
| compilation = [compiler] + sys.argv[1:] |
| logging.debug('execute compiler: %s', compilation) |
| return subprocess.call(compilation) |
| |
| |
| def parse_exec_trace(filename): |
| """ Parse the file generated by the 'libear' preloaded library. |
| |
| Given filename points to a file which contains the basic report |
| generated by the interception library or wrapper command. A single |
| report file _might_ contain multiple process creation info. """ |
| |
| logging.debug('parse exec trace file: %s', filename) |
| with open(filename, 'r') as handler: |
| content = handler.read() |
| for group in filter(bool, content.split(GS)): |
| records = group.split(RS) |
| yield { |
| 'pid': records[0], |
| 'ppid': records[1], |
| 'function': records[2], |
| 'directory': records[3], |
| 'command': records[4].split(US)[:-1] |
| } |
| |
| |
| def format_entry(exec_trace): |
| """ Generate the desired fields for compilation database entries. """ |
| |
| def abspath(cwd, name): |
| """ Create normalized absolute path from input filename. """ |
| fullname = name if os.path.isabs(name) else os.path.join(cwd, name) |
| return os.path.normpath(fullname) |
| |
| logging.debug('format this command: %s', exec_trace['command']) |
| compilation = split_command(exec_trace['command']) |
| if compilation: |
| for source in compilation.files: |
| compiler = 'c++' if compilation.compiler == 'c++' else 'cc' |
| command = [compiler, '-c'] + compilation.flags + [source] |
| logging.debug('formated as: %s', command) |
| yield { |
| 'directory': exec_trace['directory'], |
| 'command': encode(command), |
| 'file': abspath(exec_trace['directory'], source) |
| } |
| |
| |
| def is_preload_disabled(platform): |
| """ Library-based interposition will fail silently if SIP is enabled, |
| so this should be detected. You can detect whether SIP is enabled on |
| Darwin by checking whether (1) there is a binary called 'csrutil' in |
| the path and, if so, (2) whether the output of executing 'csrutil status' |
| contains 'System Integrity Protection status: enabled'. |
| |
| Same problem on linux when SELinux is enabled. The status query program |
| 'sestatus' and the output when it's enabled 'SELinux status: enabled'. """ |
| |
| if platform == 'darwin': |
| pattern = re.compile(r'System Integrity Protection status:\s+enabled') |
| command = ['csrutil', 'status'] |
| elif platform in {'linux', 'linux2'}: |
| pattern = re.compile(r'SELinux status:\s+enabled') |
| command = ['sestatus'] |
| else: |
| return False |
| |
| try: |
| lines = subprocess.check_output(command).decode('utf-8') |
| return any((pattern.match(line) for line in lines.splitlines())) |
| except: |
| return False |
| |
| |
| def entry_hash(entry): |
| """ Implement unique hash method for compilation database entries. """ |
| |
| # For faster lookup in set filename is reverted |
| filename = entry['file'][::-1] |
| # For faster lookup in set directory is reverted |
| directory = entry['directory'][::-1] |
| # On OS X the 'cc' and 'c++' compilers are wrappers for |
| # 'clang' therefore both call would be logged. To avoid |
| # this the hash does not contain the first word of the |
| # command. |
| command = ' '.join(decode(entry['command'])[1:]) |
| |
| return '<>'.join([filename, directory, command]) |
| |
| |
| def create_parser(): |
| """ Command line argument parser factory method. """ |
| |
| parser = argparse.ArgumentParser( |
| formatter_class=argparse.ArgumentDefaultsHelpFormatter) |
| |
| parser.add_argument( |
| '--verbose', '-v', |
| action='count', |
| default=0, |
| help="""Enable verbose output from '%(prog)s'. A second and third |
| flag increases verbosity.""") |
| parser.add_argument( |
| '--cdb', |
| metavar='<file>', |
| default="compile_commands.json", |
| help="""The JSON compilation database.""") |
| group = parser.add_mutually_exclusive_group() |
| group.add_argument( |
| '--append', |
| action='store_true', |
| help="""Append new entries to existing compilation database.""") |
| group.add_argument( |
| '--disable-filter', '-n', |
| dest='raw_entries', |
| action='store_true', |
| help="""Intercepted child process creation calls (exec calls) are all |
| logged to the output. The output is not a compilation database. |
| This flag is for debug purposes.""") |
| |
| advanced = parser.add_argument_group('advanced options') |
| advanced.add_argument( |
| '--override-compiler', |
| action='store_true', |
| help="""Always resort to the compiler wrapper even when better |
| intercept methods are available.""") |
| advanced.add_argument( |
| '--use-cc', |
| metavar='<path>', |
| dest='cc', |
| default='cc', |
| help="""When '%(prog)s' analyzes a project by interposing a compiler |
| wrapper, which executes a real compiler for compilation and |
| do other tasks (record the compiler invocation). Because of |
| this interposing, '%(prog)s' does not know what compiler your |
| project normally uses. Instead, it simply overrides the CC |
| environment variable, and guesses your default compiler. |
| |
| If you need '%(prog)s' to use a specific compiler for |
| *compilation* then you can use this option to specify a path |
| to that compiler.""") |
| advanced.add_argument( |
| '--use-c++', |
| metavar='<path>', |
| dest='cxx', |
| default='c++', |
| help="""This is the same as "--use-cc" but for C++ code.""") |
| |
| parser.add_argument( |
| dest='build', |
| nargs=argparse.REMAINDER, |
| help="""Command to run.""") |
| |
| return parser |