Merge pull request #28 from gottesmm/pr-0a55d87a0272d595f447879875576af64c1da6ea Add a new test that makes sure that on Linux, all libraries in a snapshot do not load any memory as writeable and executable.

commit: c45f48620e7a759b4d7041115581d988cf15b736 [log] [tgz]
author: Michael Gottesman <gottesmm@users.noreply.github.com> Thu May 03 21:39:39 2018 -0700
committer: GitHub <noreply@github.com> Thu May 03 21:39:39 2018 -0700
tree: 258eadf14b7bb9a8d2983a859d2445b44757344e
parent: c5672915ab0970a48b15df281bcc61f5097328e8 [diff]
parent: 3c419c108f5fd941be0c309e413b2d8aeab8ffda [diff]
diff --git a/lit.cfg b/lit.cfg
index d62de07..79adc57 100644
--- a/lit.cfg
+++ b/lit.cfg

@@ -139,6 +139,10 @@
 if llvm_bin_dir is None:
     lit_config.fatal("'--param llvm_bin_dir=PATH' is required")
 filecheck_path = os.path.join(llvm_bin_dir, 'FileCheck')
+readelf_path = os.path.join(llvm_bin_dir, 'llvm-readelf')
+lit_config.note("testing using 'FileCheck': %r" % (filecheck_path,))
+lit_config.note("testing using 'readelf': %r" % (readelf_path,))
+
 
 # Use the default Swift src layout if swiftpm is not provided as a
 # param
@@ -150,7 +154,6 @@
     config.substitutions.append( ('%{swiftpm_srcdir}', swiftpm_srcdir) )
     
 # Find the tools we need.
-lit_config.note("testing using 'FileCheck': %r" % (filecheck_path,))
 
 swift_path = lit_config.params.get(
     "swift",
@@ -182,11 +185,13 @@
 
 # Define our supported substitutions.
 config.substitutions.append( ('%{package_path}', package_path) )
+config.substitutions.append( ('%{python}', sys.executable) )
 config.substitutions.append( ('%{not}', os.path.join(srcroot, "not")) )
 config.substitutions.append( ('%{lldb}', lldb_path) )
 config.substitutions.append( ('%{swift}', swift_path) )
 config.substitutions.append( ('%{swiftc}', swiftc_path) )
 config.substitutions.append( ('%{FileCheck}', filecheck_path) )
+config.substitutions.append( ('%{readelf}', readelf_path) )
 
 # Add substitutions for swiftpm executables.
 swiftpm_build = lit_config.params.get("swiftpm-build")
@@ -203,6 +208,6 @@
 ###
 
 # Protected against unquoted use of substitutions.
-for name in ('swift-build', 'FileCheck'):
+for name in ('swift-build', 'FileCheck', 'readelf'):
   config.substitutions.append((' {0} '.format(name),
                                ' unquoted-command-name-{0} '.format(name)))

diff --git a/test-snapshot-binaries/linux_load_commands.py b/test-snapshot-binaries/linux_load_commands.py
new file mode 100644
index 0000000..f7d044e
--- /dev/null
+++ b/test-snapshot-binaries/linux_load_commands.py

@@ -0,0 +1,161 @@
+
+# REQUIRES: platform=Linux
+# RUN: rm -rf %T && mkdir -p %t
+# RUN: %{python} %s '%{package_path}' '%T' '%{readelf}'
+
+# Test that all linux libraries that we provide do not have any load
+# commands that are both writeable and executable.
+
+import argparse
+import re
+import sys
+import subprocess
+
+# For each library, we want to run llvm-readelf on it and verify that none of
+# the flag fields say that the load commands are both writable and
+# executable. Our target outputs look like this:
+#
+# ----
+# There are 7 program headers, starting at offset 64
+#
+# Program Headers:
+#   Type           Offset   VirtAddr           PhysAddr           FileSiz  MemSiz   Flg Align
+#   PHDR           0x000040 0x0000000000000040 0x0000000000000040 0x000188 0x000188 R   0x8
+#   LOAD           0x000000 0x0000000000000000 0x0000000000000000 0x9839a0 0x9839a0 R E 0x1000
+#   LOAD           0x983a60 0x0000000000984a60 0x0000000000984a60 0x07ad78 0x0a3da9 RW  0x1000
+#   DYNAMIC        0x9b5b88 0x00000000009b6b88 0x00000000009b6b88 0x0002f0 0x0002f0 RW  0x8
+#   GNU_EH_FRAME   0x95ecd4 0x000000000095ecd4 0x000000000095ecd4 0x024ccc 0x024ccc R   0x4
+#   GNU_STACK      0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW  0x0
+#   GNU_RELRO      0x983a60 0x0000000000984a60 0x0000000000984a60 0x0345a0 0x0345a0 RW  0x10
+# ----
+#
+# TODO: Evaluate if parallelism helps here. We /could/ use libdispatch to work
+# in parallel over all artifacts.
+class ParseState(object):
+    firstLine = 0
+    programHeadersLine = 1
+    dataHeader = 2
+    data = 3
+
+    def __init__(self, state=None):
+        if state is None:
+            state = ParseState.firstLine
+        self.value = state
+
+    @property
+    def regex_string(self):
+        if self.value == ParseState.firstLine:
+            return "There are (\d+) program headers"
+        if self.value == ParseState.programHeadersLine:
+            return "Program Headers:"
+        if self.value == ParseState.dataHeader:
+            return "\\s+Type"
+        if self.value == ParseState.data:
+            name = "(\w+)"
+            hex_pattern = "0x[0-9a-fA-F]+"
+            ws = "\s"
+            col = "{}+{}".format(ws, hex_pattern)
+            return "^{ws}*{name}{col}{col}{col}{col}{col} (.+) 0x".format(**
+                {'ws': ws, 'name': name, 'col': col})
+        raise RuntimeError('Invalid ParseState value')
+
+    @property
+    def regex(self):
+        return re.compile(self.regex_string)
+
+    @property
+    def next(self):
+        if self.value == ParseState.firstLine:
+            return ParseState(ParseState.programHeadersLine)
+        if self.value == ParseState.programHeadersLine:
+            return ParseState(ParseState.dataHeader)
+        if self.value == ParseState.dataHeader:
+            return ParseState(ParseState.data)
+        if self.value == ParseState.data:
+            return self
+        raise RuntimeError('Invalid ParseState value')
+
+    def matches(self, input_string):
+        return self.regex.match(input_string)
+
+def process_library(args, lib):
+    assert(len(lib) > 0)
+
+    numberOfLines = None
+    numberOfLinesSeen = 0
+
+    print("Visiting lib: {}".format(lib))
+    lines = list(reversed(subprocess.check_output([args.read_elf, "-program-headers", lib]).split("\n")[:-1]))
+    p = ParseState()
+
+    # Until we finish parsing or run out of lines to parse...
+    while len(lines) > 0:
+        l = lines.pop()
+        print("DUMP: '{}'".format(l))
+        assert(p is not None)
+        curState = p
+
+        m = curState.matches(l)
+        if m is None:
+            continue
+
+        p = curState.next
+        if curState.value == ParseState.firstLine:
+            numberOfLines = int(m.group(1))
+            continue
+
+        if curState.value == ParseState.programHeadersLine:
+            continue
+
+        if curState.value == ParseState.dataHeader:
+            continue
+
+        if curState.value == ParseState.data:
+            val = m.group(1)
+            if val == "LOAD":
+                flags = m.group(2)
+                print("Found LOAD command! Flags: '{}'. Full match: '{}'".format(flags, l))
+                if "W" in flags and "E" in flags:
+                    raise RuntimeError("Found a load command that loads something executable and writeable")
+
+            # If we haven't seen enough lines, continue.
+            assert(numberOfLines is not None)
+            if numberOfLinesSeen != numberOfLines - 1:
+                numberOfLinesSeen += 1
+                continue
+
+            # If we have seen enough lines, be sure to not only break out
+            # of the switch, but additionally break out of the whole
+            # parsing loop. We could go through the rest of the output from
+            # llvm-readelf, but there isn't any point.
+            p = None
+            break
+
+    # If we ran out of lines to parse without finishing parsing, we failed.
+    assert(p is None)
+    assert(numberOfLines is not None)
+    assert(numberOfLinesSeen == numberOfLines - 1)
+
+def get_libraries(package_path):
+    cmd = [
+        "/usr/bin/find",
+        package_path,
+        "-iname",
+        "*.so"
+    ]
+    return subprocess.check_output(cmd).split("\n")[:-1]
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('package_path')
+    parser.add_argument('tmp_dir')
+    parser.add_argument('read_elf')
+    args = parser.parse_args()
+
+    libraries = get_libraries(args.package_path)
+    for l in libraries:
+          process_library(args, l)
+    sys.exit(0)
+
+if __name__ == "__main__":
+    main()
commit	c45f48620e7a759b4d7041115581d988cf15b736	[log] [tgz]
author	Michael Gottesman <gottesmm@users.noreply.github.com>	Thu May 03 21:39:39 2018 -0700
committer	GitHub <noreply@github.com>	Thu May 03 21:39:39 2018 -0700
tree	258eadf14b7bb9a8d2983a859d2445b44757344e
parent	c5672915ab0970a48b15df281bcc61f5097328e8 [diff]
parent	3c419c108f5fd941be0c309e413b2d8aeab8ffda [diff]