| #include "ThinLtoModuleIndex.h" |
| |
| #include "llvm/Bitcode/BitcodeReader.h" |
| #include "llvm/ExecutionEngine/Orc/SymbolStringPool.h" |
| #include "llvm/IR/LLVMContext.h" |
| #include "llvm/IRReader/IRReader.h" |
| #include "llvm/Support/SourceMgr.h" |
| #include "llvm/Support/raw_ostream.h" |
| |
| #include <memory> |
| #include <string> |
| |
| #define DEBUG_TYPE "thinltojit" |
| |
| namespace llvm { |
| namespace orc { |
| |
| Error ThinLtoModuleIndex::add(StringRef InputPath) { |
| auto Buffer = errorOrToExpected(MemoryBuffer::getFile(InputPath)); |
| if (!Buffer) |
| return Buffer.takeError(); |
| |
| Error ParseErr = readModuleSummaryIndex((*Buffer)->getMemBufferRef(), |
| CombinedSummaryIndex, NextModuleId); |
| if (ParseErr) |
| return ParseErr; |
| |
| #ifndef NDEBUG |
| auto Paths = getAllModulePaths(); |
| unsigned TotalPaths = Paths.size(); |
| std::sort(Paths.begin(), Paths.end()); |
| Paths.erase(std::unique(Paths.begin(), Paths.end()), Paths.end()); |
| assert(TotalPaths == Paths.size() && "Module paths must be unique"); |
| #endif |
| |
| ++NextModuleId; |
| return Error::success(); |
| } |
| |
| std::vector<StringRef> ThinLtoModuleIndex::getAllModulePaths() const { |
| auto ModuleTable = CombinedSummaryIndex.modulePaths(); |
| |
| std::vector<StringRef> Paths; |
| Paths.resize(ModuleTable.size()); |
| |
| for (const auto &KV : ModuleTable) { |
| assert(Paths[KV.second.first].empty() && "IDs are unique and continuous"); |
| Paths[KV.second.first] = KV.first(); |
| } |
| |
| return Paths; |
| } |
| |
| GlobalValueSummary * |
| ThinLtoModuleIndex::getSummary(GlobalValue::GUID Function) const { |
| ValueInfo VI = CombinedSummaryIndex.getValueInfo(Function); |
| if (!VI || VI.getSummaryList().empty()) |
| return nullptr; |
| |
| // There can be more than one symbol with the same GUID, in the case of same- |
| // named locals in different but same-named source files that were compiled in |
| // their respective directories (so the source file name and resulting GUID is |
| // the same). We avoid this by checking that module paths are unique upon |
| // add(). |
| // |
| // TODO: We can still get duplicates on symbols declared with |
| // attribute((weak)), a GNU extension supported by gcc and clang. |
| // We should support it by looking for a symbol in the current module |
| // or in the same module as the caller. |
| assert(VI.getSummaryList().size() == 1 && "Weak symbols not yet supported"); |
| |
| return VI.getSummaryList().front().get()->getBaseObject(); |
| } |
| |
| Optional<StringRef> |
| ThinLtoModuleIndex::getModulePathForSymbol(StringRef Name) const { |
| if (GlobalValueSummary *S = getSummary(GlobalValue::getGUID(Name))) |
| return S->modulePath(); |
| return None; // We don't know the symbol. |
| } |
| |
| void ThinLtoModuleIndex::scheduleModuleParsingPrelocked(StringRef Path) { |
| // Once the module was scheduled, we can call takeModule(). |
| auto ScheduledIt = ScheduledModules.find(Path); |
| if (ScheduledIt != ScheduledModules.end()) |
| return; |
| |
| auto Worker = [this](std::string Path) { |
| if (auto TSM = doParseModule(Path)) { |
| std::lock_guard<std::mutex> Lock(ParsedModulesLock); |
| ParsedModules[Path] = std::move(*TSM); |
| |
| LLVM_DEBUG(dbgs() << "Finished parsing module: " << Path << "\n"); |
| } else { |
| ES.reportError(TSM.takeError()); |
| } |
| }; |
| |
| LLVM_DEBUG(dbgs() << "Schedule module for parsing: " << Path << "\n"); |
| ScheduledModules[Path] = ParseModuleWorkers.async(Worker, Path.str()); |
| } |
| |
| ThreadSafeModule ThinLtoModuleIndex::takeModule(StringRef Path) { |
| std::unique_lock<std::mutex> ParseLock(ParsedModulesLock); |
| |
| auto ParsedIt = ParsedModules.find(Path); |
| if (ParsedIt == ParsedModules.end()) { |
| ParseLock.unlock(); |
| |
| // The module is not ready, wait for the future we stored. |
| std::unique_lock<std::mutex> ScheduleLock(ScheduledModulesLock); |
| auto ScheduledIt = ScheduledModules.find(Path); |
| assert(ScheduledIt != ScheduledModules.end() && |
| "Don't call for unscheduled modules"); |
| std::shared_future<void> Future = ScheduledIt->getValue(); |
| ScheduleLock.unlock(); |
| Future.get(); |
| |
| ParseLock.lock(); |
| ParsedIt = ParsedModules.find(Path); |
| assert(ParsedIt != ParsedModules.end() && "Must be ready now"); |
| } |
| |
| // We only add each module once. If it's not here anymore, we can skip it. |
| ThreadSafeModule TSM = std::move(ParsedIt->getValue()); |
| ParsedIt->getValue() = ThreadSafeModule(); |
| return TSM; |
| } |
| |
| ThreadSafeModule ThinLtoModuleIndex::parseModuleFromFile(StringRef Path) { |
| { |
| std::lock_guard<std::mutex> ScheduleLock(ScheduledModulesLock); |
| scheduleModuleParsingPrelocked(Path); |
| } |
| return takeModule(Path); |
| } |
| |
| Expected<ThreadSafeModule> ThinLtoModuleIndex::doParseModule(StringRef Path) { |
| // TODO: make a SMDiagnosticError class for this |
| SMDiagnostic Err; |
| auto Ctx = std::make_unique<LLVMContext>(); |
| auto M = parseIRFile(Path, Err, *Ctx); |
| if (!M) { |
| std::string ErrDescription; |
| { |
| raw_string_ostream S(ErrDescription); |
| Err.print("ThinLtoJIT", S); |
| } |
| return createStringError(inconvertibleErrorCode(), |
| "Failed to load module from file '%s' (%s)", |
| Path.data(), ErrDescription.c_str()); |
| } |
| |
| return ThreadSafeModule(std::move(M), std::move(Ctx)); |
| } |
| |
| // We don't filter visited functions. Discovery will often be retriggered |
| // from the middle of already visited functions and it aims to reach a little |
| // further each time. |
| void ThinLtoModuleIndex::discoverCalleeModulePaths(FunctionSummary *S, |
| unsigned LookaheadLevels) { |
| // Populate initial worklist |
| std::vector<FunctionSummary *> Worklist; |
| addToWorklist(Worklist, S->calls()); |
| unsigned Distance = 0; |
| |
| while (++Distance < LookaheadLevels) { |
| // Process current worklist and populate a new one. |
| std::vector<FunctionSummary *> NextWorklist; |
| for (FunctionSummary *F : Worklist) { |
| updatePathRank(F->modulePath(), Distance); |
| addToWorklist(NextWorklist, F->calls()); |
| } |
| Worklist = std::move(NextWorklist); |
| } |
| |
| // Process the last worklist without filling a new one |
| for (FunctionSummary *F : Worklist) { |
| updatePathRank(F->modulePath(), Distance); |
| } |
| |
| // Reset counts for known paths (includes both, scheduled and parsed modules). |
| std::lock_guard<std::mutex> Lock(ScheduledModulesLock); |
| for (const auto &KV : ScheduledModules) { |
| PathRank[KV.first()].Count = 0; |
| } |
| } |
| |
| void ThinLtoModuleIndex::addToWorklist( |
| std::vector<FunctionSummary *> &List, |
| ArrayRef<FunctionSummary::EdgeTy> Calls) { |
| for (const auto &Edge : Calls) { |
| const auto &SummaryList = Edge.first.getSummaryList(); |
| if (!SummaryList.empty()) { |
| GlobalValueSummary *S = SummaryList.front().get()->getBaseObject(); |
| assert(isa<FunctionSummary>(S) && "Callees must be functions"); |
| List.push_back(cast<FunctionSummary>(S)); |
| } |
| } |
| } |
| |
| // PathRank is global and continuous. |
| void ThinLtoModuleIndex::updatePathRank(StringRef Path, unsigned Distance) { |
| auto &Entry = PathRank[Path]; |
| Entry.Count += 1; |
| Entry.MinDist = std::min(Entry.MinDist, Distance); |
| assert(Entry.MinDist > 0 && "We want it as a divisor"); |
| } |
| |
| // TODO: The size of a ThreadPool's task queue is not accessible. It would |
| // be great to know in order to estimate how many modules we schedule. The |
| // more we schedule, the less precise is the ranking. The less we schedule, |
| // the higher the risk for downtime. |
| std::vector<std::string> ThinLtoModuleIndex::selectNextPaths() { |
| struct ScorePath { |
| float Score; |
| unsigned MinDist; |
| StringRef Path; |
| }; |
| |
| std::vector<ScorePath> Candidates; |
| Candidates.reserve(PathRank.size()); |
| for (const auto &KV : PathRank) { |
| float Score = static_cast<float>(KV.second.Count) / KV.second.MinDist; |
| if (Score > .0f) { |
| Candidates.push_back({Score, KV.second.MinDist, KV.first()}); |
| } |
| } |
| |
| // Sort candidates by descending score. |
| std::sort(Candidates.begin(), Candidates.end(), |
| [](const ScorePath &LHS, const ScorePath &RHS) { |
| return LHS.Score > RHS.Score; |
| }); |
| |
| // Sort highest score candidates by ascending minimal distance. |
| size_t Selected = |
| std::min(std::max<size_t>(NumParseModuleThreads, Candidates.size() / 2), |
| Candidates.size()); |
| std::sort(Candidates.begin(), Candidates.begin() + Selected, |
| [](const ScorePath &LHS, const ScorePath &RHS) { |
| return LHS.MinDist < RHS.MinDist; |
| }); |
| |
| std::vector<std::string> Paths; |
| Paths.reserve(Selected); |
| for (unsigned i = 0; i < Selected; i++) { |
| Paths.push_back(Candidates[i].Path.str()); |
| } |
| |
| LLVM_DEBUG(dbgs() << "ModuleIndex: select " << Paths.size() << " out of " |
| << Candidates.size() << " discovered paths\n"); |
| |
| return Paths; |
| } |
| |
| unsigned ThinLtoModuleIndex::getNumDiscoveredModules() const { |
| // TODO: It would probably be more efficient to track the number of |
| // unscheduled modules. |
| unsigned NonNullItems = 0; |
| for (const auto &KV : PathRank) |
| if (KV.second.Count > 0) |
| ++NonNullItems; |
| return NonNullItems; |
| } |
| |
| } // namespace orc |
| } // namespace llvm |