blob: 0a812afbf6d06f54077b961b638364aaf11f5676 [file] [log] [blame]
//===--- SILFunctionExtractor.cpp - SIL function extraction utility -------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
///
/// \file
///
/// This utility is meant to help simplify the extraction of test cases from sil
/// files by removing (currently only) functions that do not match a list of
/// string. It also allows for the inverse to be selected. Eventually this
/// should have additional capabilities like stripping globals, vtables, etc.
///
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "sil-func-extractor"
#include "swift/Strings.h"
#include "swift/Basic/LLVM.h"
#include "swift/Basic/LLVMInitialize.h"
#include "swift/Demangling/Demangle.h"
#include "swift/Demangling/ManglingMacros.h"
#include "swift/Frontend/DiagnosticVerifier.h"
#include "swift/Frontend/Frontend.h"
#include "swift/Frontend/PrintingDiagnosticConsumer.h"
#include "swift/SIL/SILBuilder.h"
#include "swift/SIL/SILUndef.h"
#include "swift/SILOptimizer/Analysis/Analysis.h"
#include "swift/SILOptimizer/PassManager/PassManager.h"
#include "swift/SILOptimizer/PassManager/Passes.h"
#include "swift/Serialization/SerializedModuleLoader.h"
#include "swift/Serialization/SerializationOptions.h"
#include "swift/Serialization/SerializedSILLoader.h"
#include "swift/Subsystems.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Signals.h"
#include <cstdio>
using namespace swift;
static llvm::cl::opt<std::string> InputFilename(llvm::cl::desc("input file"),
llvm::cl::init("-"),
llvm::cl::Positional);
static llvm::cl::opt<std::string>
OutputFilename("o", llvm::cl::desc("output filename"), llvm::cl::init("-"));
static llvm::cl::opt<bool>
EmitVerboseSIL("emit-verbose-sil",
llvm::cl::desc("Emit locations during sil emission."));
static llvm::cl::list<std::string>
CommandLineFunctionNames("func",
llvm::cl::desc("Function names to extract."));
static llvm::cl::opt<std::string> FunctionNameFile(
"func-file", llvm::cl::desc("File to load additional function names from"));
static llvm::cl::opt<bool>
EmitSIB("emit-sib",
llvm::cl::desc("Emit a sib file as output instead of a sil file"));
static llvm::cl::opt<bool> InvertMatch(
"invert",
llvm::cl::desc("Match functions whose name do not "
"match the names of the functions to be extracted"));
static llvm::cl::list<std::string>
ImportPaths("I",
llvm::cl::desc("add a directory to the import search path"));
static llvm::cl::opt<std::string>
ModuleName("module-name",
llvm::cl::desc("The name of the module if processing"
" a module. Necessary for processing "
"stdin."));
static llvm::cl::opt<std::string>
ModuleCachePath("module-cache-path",
llvm::cl::desc("Clang module cache path"));
static llvm::cl::opt<std::string> ResourceDir(
"resource-dir",
llvm::cl::desc("The directory that holds the compiler resource files"));
static llvm::cl::opt<std::string>
SDKPath("sdk", llvm::cl::desc("The path to the SDK for use with the clang "
"importer."),
llvm::cl::init(""));
static llvm::cl::opt<std::string> Triple("target",
llvm::cl::desc("target triple"));
static llvm::cl::opt<bool>
EnableSILSortOutput("emit-sorted-sil", llvm::cl::Hidden,
llvm::cl::init(false),
llvm::cl::desc("Sort Functions, VTables, Globals, "
"WitnessTables by name to ease diffing."));
static llvm::cl::opt<bool>
DisableASTDump("sil-disable-ast-dump", llvm::cl::Hidden,
llvm::cl::init(false),
llvm::cl::desc("Do not dump AST."));
static llvm::cl::opt<bool> AssumeUnqualifiedOwnershipWhenParsing(
"assume-parsing-unqualified-ownership-sil", llvm::cl::Hidden,
llvm::cl::init(false),
llvm::cl::desc("Assume all parsed functions have unqualified ownership"));
static llvm::cl::opt<bool>
DisableSILLinking("disable-sil-linking",
llvm::cl::init(true),
llvm::cl::desc("Disable SIL linking"));
// This function isn't referenced outside its translation unit, but it
// can't use the "static" keyword because its address is used for
// getMainExecutable (since some platforms don't support taking the
// address of main, and some platforms can't implement getMainExecutable
// without being given the address of a function in the main executable).
void anchorForGetMainExecutable() {}
static void getFunctionNames(std::vector<std::string> &Names) {
std::copy(CommandLineFunctionNames.begin(), CommandLineFunctionNames.end(),
std::back_inserter(Names));
if (!FunctionNameFile.empty()) {
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileBufOrErr =
llvm::MemoryBuffer::getFileOrSTDIN(FunctionNameFile);
if (!FileBufOrErr) {
fprintf(stderr, "Error! Failed to open file: %s\n",
InputFilename.c_str());
exit(-1);
}
StringRef Buffer = FileBufOrErr.get()->getBuffer();
while (!Buffer.empty()) {
StringRef Token, NewBuffer;
std::tie(Token, NewBuffer) = llvm::getToken(Buffer, "\n");
if (Token.empty()) {
break;
}
Names.push_back(Token);
Buffer = NewBuffer;
}
}
}
static bool stringInSortedArray(
StringRef str, ArrayRef<std::string> list,
llvm::function_ref<bool(const std::string &, const std::string &)> &&cmp) {
if (list.empty())
return false;
auto iter = std::lower_bound(list.begin(), list.end(), str, cmp);
// If we didn't find str, return false.
if (list.end() == iter)
return false;
return !cmp(str, *iter);
}
void removeUnwantedFunctions(SILModule *M, ArrayRef<std::string> MangledNames,
ArrayRef<std::string> DemangledNames) {
assert((!MangledNames.empty() || !DemangledNames.empty()) &&
"Expected names of function we want to retain!");
assert(M && "Expected a SIL module to extract from.");
std::vector<SILFunction *> DeadFunctions;
for (auto &F : M->getFunctionList()) {
StringRef MangledName = F.getName();
std::string DemangledName =
swift::Demangle::demangleSymbolAsString(MangledName);
DemangledName = DemangledName.substr(0, DemangledName.find_first_of(" <("));
DEBUG(llvm::dbgs() << "Visiting New Func:\n"
<< " Mangled: " << MangledName
<< "\n Demangled: " << DemangledName << "\n");
bool FoundMangledName = stringInSortedArray(MangledName, MangledNames,
std::less<std::string>());
bool FoundDemangledName = stringInSortedArray(
DemangledName, DemangledNames,
[](const std::string &str1, const std::string &str2) -> bool {
return str1.substr(0, str1.find(' ')) <
str2.substr(0, str2.find(' '));
});
if ((FoundMangledName || FoundDemangledName) ^ InvertMatch) {
DEBUG(llvm::dbgs() << " Not removing!\n");
continue;
}
DEBUG(llvm::dbgs() << " Removing!\n");
// If F has no body, there is nothing further to do.
if (!F.size())
continue;
SILBasicBlock &BB = F.front();
SILLocation Loc = BB.back().getLoc();
BB.split(BB.begin());
// Make terminator unreachable.
SILBuilder(&BB).createUnreachable(Loc);
DeadFunctions.push_back(&F);
}
// After running this pass all of the functions we will remove
// should consist only of one basic block terminated by
// UnreachableInst.
performSILDiagnoseUnreachable(M, nullptr);
// Now mark all of these functions as public and remove their bodies.
for (auto &F : DeadFunctions) {
F->setLinkage(SILLinkage::PublicExternal);
F->getBlocks().clear();
}
// Remove dead functions.
performSILDeadFunctionElimination(M);
}
int main(int argc, char **argv) {
INITIALIZE_LLVM(argc, argv);
llvm::cl::ParseCommandLineOptions(argc, argv, "Swift SIL Extractor\n");
CompilerInvocation Invocation;
Invocation.setMainExecutablePath(llvm::sys::fs::getMainExecutable(
argv[0], reinterpret_cast<void *>(&anchorForGetMainExecutable)));
// Give the context the list of search paths to use for modules.
Invocation.setImportSearchPaths(ImportPaths);
// Set the SDK path and target if given.
if (SDKPath.getNumOccurrences() == 0) {
const char *SDKROOT = getenv("SDKROOT");
if (SDKROOT)
SDKPath = SDKROOT;
}
if (!SDKPath.empty())
Invocation.setSDKPath(SDKPath);
if (!Triple.empty())
Invocation.setTargetTriple(Triple);
if (!ResourceDir.empty())
Invocation.setRuntimeResourcePath(ResourceDir);
Invocation.getClangImporterOptions().ModuleCachePath = ModuleCachePath;
Invocation.setParseStdlib();
Invocation.getLangOptions().DisableAvailabilityChecking = true;
Invocation.getLangOptions().EnableAccessControl = false;
Invocation.getLangOptions().EnableObjCAttrRequiresFoundation = false;
serialization::ExtendedValidationInfo extendedInfo;
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileBufOrErr =
Invocation.setUpInputForSILTool(InputFilename, ModuleName, true,
extendedInfo);
if (!FileBufOrErr) {
fprintf(stderr, "Error! Failed to open file: %s\n", InputFilename.c_str());
exit(-1);
}
SILOptions &SILOpts = Invocation.getSILOptions();
SILOpts.AssumeUnqualifiedOwnershipWhenParsing =
AssumeUnqualifiedOwnershipWhenParsing;
CompilerInstance CI;
PrintingDiagnosticConsumer PrintDiags;
CI.addDiagnosticConsumer(&PrintDiags);
if (CI.setup(Invocation))
return 1;
CI.performSema();
// If parsing produced an error, don't run any passes.
if (CI.getASTContext().hadError())
return 1;
// Load the SIL if we have a module. We have to do this after SILParse
// creating the unfortunate double if statement.
if (Invocation.hasSerializedAST()) {
assert(!CI.hasSILModule() &&
"performSema() should not create a SILModule.");
CI.setSILModule(
SILModule::createEmptyModule(CI.getMainModule(), CI.getSILOptions()));
std::unique_ptr<SerializedSILLoader> SL = SerializedSILLoader::create(
CI.getASTContext(), CI.getSILModule(), nullptr);
if (extendedInfo.isSIB() || DisableSILLinking)
SL->getAllForModule(CI.getMainModule()->getName(), nullptr);
else
SL->getAll();
}
if (CommandLineFunctionNames.empty() && FunctionNameFile.empty())
return CI.getASTContext().hadError();
// For efficient usage, we separate our names into two separate sorted
// lists, one of managled names, and one of unmangled names.
std::vector<std::string> Names;
getFunctionNames(Names);
// First partition our function names into mangled/demangled arrays.
auto FirstDemangledName = std::partition(
Names.begin(), Names.end(), [](const std::string &Name) -> bool {
StringRef NameRef(Name);
return NameRef.startswith("_T") ||
NameRef.startswith(MANGLING_PREFIX_STR);
});
// Then grab offsets to avoid any issues with iterator invalidation when we
// sort.
unsigned NumMangled = std::distance(Names.begin(), FirstDemangledName);
unsigned NumNames = Names.size();
// Then sort the two partitioned arrays.
std::sort(Names.begin(), FirstDemangledName);
std::sort(FirstDemangledName, Names.end());
// Finally construct our ArrayRefs into the sorted std::vector for our
// mangled and demangled names.
ArrayRef<std::string> MangledNames(&*Names.begin(), NumMangled);
ArrayRef<std::string> DemangledNames(&*std::next(Names.begin(), NumMangled),
NumNames - NumMangled);
DEBUG(llvm::errs() << "MangledNames to keep:\n";
std::for_each(MangledNames.begin(), MangledNames.end(),
[](const std::string &str) {
llvm::errs() << " " << str << '\n';
}));
DEBUG(llvm::errs() << "DemangledNames to keep:\n";
std::for_each(DemangledNames.begin(), DemangledNames.end(),
[](const std::string &str) {
llvm::errs() << " " << str << '\n';
}));
removeUnwantedFunctions(CI.getSILModule(), MangledNames, DemangledNames);
if (EmitSIB) {
llvm::SmallString<128> OutputFile;
if (OutputFilename.size()) {
OutputFile = OutputFilename;
} else if (ModuleName.size()) {
OutputFile = ModuleName;
llvm::sys::path::replace_extension(OutputFile, SIB_EXTENSION);
} else {
OutputFile = CI.getMainModule()->getName().str();
llvm::sys::path::replace_extension(OutputFile, SIB_EXTENSION);
}
SerializationOptions serializationOpts;
serializationOpts.OutputPath = OutputFile.c_str();
serializationOpts.SerializeAllSIL = true;
serializationOpts.IsSIB = true;
serialize(CI.getMainModule(), serializationOpts, CI.getSILModule());
} else {
const StringRef OutputFile =
OutputFilename.size() ? StringRef(OutputFilename) : "-";
if (OutputFile == "-") {
CI.getSILModule()->print(llvm::outs(), EmitVerboseSIL, CI.getMainModule(),
EnableSILSortOutput, !DisableASTDump);
} else {
std::error_code EC;
llvm::raw_fd_ostream OS(OutputFile, EC, llvm::sys::fs::F_None);
if (EC) {
llvm::errs() << "while opening '" << OutputFile << "': " << EC.message()
<< '\n';
return 1;
}
CI.getSILModule()->print(OS, EmitVerboseSIL, CI.getMainModule(),
EnableSILSortOutput, !DisableASTDump);
}
}
}