Merge pull request #15722 from jckarter/deserialize-clang-importer-witness-tables-iii

 SIL: Force Clang-imported protocol conformances to get deserialized when used.
diff --git a/include/swift/SIL/SILModule.h b/include/swift/SIL/SILModule.h
index 6362909..fcb0c91 100644
--- a/include/swift/SIL/SILModule.h
+++ b/include/swift/SIL/SILModule.h
@@ -708,6 +708,11 @@
   bool isDefaultAtomic() const {
     return ! getOptions().AssumeSingleThreaded;
   }
+  
+  /// Returns true if SIL entities associated with declarations in the given
+  /// declaration context ought to be serialized as part of this module.
+  bool shouldSerializeEntitiesAssociatedWithDeclContext(const DeclContext *DC)
+    const;
 };
 
 inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SILModule &M){
diff --git a/lib/SIL/Linker.cpp b/lib/SIL/Linker.cpp
index dd18be3..e2d36f8 100644
--- a/lib/SIL/Linker.cpp
+++ b/lib/SIL/Linker.cpp
@@ -18,6 +18,8 @@
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/Debug.h"
 #include "swift/AST/ProtocolConformance.h"
+#include "swift/AST/SubstitutionMap.h"
+#include "swift/ClangImporter/ClangModule.h"
 #include "swift/SIL/FormalLinkage.h"
 #include <functional>
 
@@ -111,10 +113,18 @@
 //===----------------------------------------------------------------------===//
 
 bool SILLinkerVisitor::visitApplyInst(ApplyInst *AI) {
+  bool performFuncDeserialization = false;
+  
+  if (auto sig = AI->getCallee()->getType().castTo<SILFunctionType>()
+                   ->getGenericSignature()) {
+    performFuncDeserialization |= visitApplySubstitutions(
+      sig->getSubstitutionMap(AI->getSubstitutions()));
+  }
+  
   // Ok we have a function ref inst, grab the callee.
   SILFunction *Callee = AI->getReferencedFunction();
   if (!Callee)
-    return false;
+    return performFuncDeserialization;
 
   if (isLinkAll() ||
       hasSharedVisibility(Callee->getLinkage())) {
@@ -122,13 +132,21 @@
     return true;
   }
 
-  return false;
+  return performFuncDeserialization;
 }
 
 bool SILLinkerVisitor::visitPartialApplyInst(PartialApplyInst *PAI) {
+  bool performFuncDeserialization = false;
+  
+  if (auto sig = PAI->getCallee()->getType().castTo<SILFunctionType>()
+                    ->getGenericSignature()) {
+    performFuncDeserialization |= visitApplySubstitutions(
+      sig->getSubstitutionMap(PAI->getSubstitutions()));
+  }
+
   SILFunction *Callee = PAI->getReferencedFunction();
   if (!Callee)
-    return false;
+    return performFuncDeserialization;
 
   if (isLinkAll() ||
       hasSharedVisibility(Callee->getLinkage())) {
@@ -136,7 +154,7 @@
     return true;
   }
 
-  return false;
+  return performFuncDeserialization;
 }
 
 bool SILLinkerVisitor::visitFunctionRefInst(FunctionRefInst *FRI) {
@@ -154,15 +172,36 @@
   return false;
 }
 
+// Eagerly visiting all used conformances leads to a large blowup
+// in the amount of SIL we read in. For optimization purposes we can defer
+// reading in most conformances until we need them for devirtualization.
+// However, we *must* pull in shared clang-importer-derived conformances
+// we potentially use, since we may not otherwise have a local definition.
+static bool mustDeserializeProtocolConformance(SILModule &M,
+                                               ProtocolConformanceRef c) {
+  if (!c.isConcrete())
+    return false;
+  auto conformance = c.getConcrete()->getRootNormalConformance();
+  return M.Types.protocolRequiresWitnessTable(conformance->getProtocol())
+    && isa<ClangModuleUnit>(conformance->getDeclContext()
+                                       ->getModuleScopeContext());
+}
+
 bool SILLinkerVisitor::visitProtocolConformance(
     ProtocolConformanceRef ref, const Optional<SILDeclRef> &Member) {
   // If an abstract protocol conformance was passed in, just return false.
   if (ref.isAbstract())
     return false;
+  
+  bool mustDeserialize = mustDeserializeProtocolConformance(Mod, ref);
 
   // Otherwise try and lookup a witness table for C.
   auto C = ref.getConcrete();
-  SILWitnessTable *WT = Mod.lookUpWitnessTable(C);
+  
+  if (!VisitedConformances.insert(C).second)
+    return false;
+  
+  SILWitnessTable *WT = Mod.lookUpWitnessTable(C, true);
 
   // If we don't find any witness table for the conformance, bail and return
   // false.
@@ -170,7 +209,16 @@
     Mod.createWitnessTableDeclaration(
         C, getLinkageForProtocolConformance(
                C->getRootNormalConformance(), NotForDefinition));
-    return false;
+
+    // Adding the declaration may allow us to now deserialize the body.
+    // Force the body if we must deserialize this witness table.
+    if (mustDeserialize) {
+      WT = Mod.lookUpWitnessTable(C, true);
+      assert(WT && WT->isDefinition()
+             && "unable to deserialize witness table when we must?!");
+    } else {
+      return false;
+    }
   }
 
   // If the looked up witness table is a declaration, there is nothing we can
@@ -179,10 +227,23 @@
     return false;
 
   bool performFuncDeserialization = false;
+  
+  auto maybeVisitRelatedConformance = [&](ProtocolConformanceRef c) {
+    // Formally all conformances referenced by a used conformance are used.
+    // However, eagerly visiting them all at this point leads to a large blowup
+    // in the amount of SIL we read in. For optimization purposes we can defer
+    // reading in most conformances until we need them for devirtualization.
+    // However, we *must* pull in shared clang-importer-derived conformances
+    // we potentially use, since we may not otherwise have a local definition.
+    if (mustDeserializeProtocolConformance(Mod, c))
+      performFuncDeserialization |= visitProtocolConformance(c, None);
+  };
+  
   // For each entry in the witness table...
   for (auto &E : WT->getEntries()) {
+    switch (E.getKind()) {
     // If the entry is a witness method...
-    if (E.getKind() == SILWitnessTable::WitnessKind::Method) {
+    case SILWitnessTable::WitnessKind::Method: {
       // And we are only interested in deserializing a specific requirement
       // and don't have that requirement, don't deserialize this method.
       if (Member.hasValue() && E.getMethodWitness().Requirement != *Member)
@@ -197,12 +258,65 @@
       // to deserialize.
       performFuncDeserialization = true;
       addFunctionToWorklist(E.getMethodWitness().Witness);
+      break;
+    }
+    
+    // If the entry is a related witness table, see whether we need to
+    // eagerly deserialize it.
+    case SILWitnessTable::WitnessKind::BaseProtocol: {
+      auto baseConformance = E.getBaseProtocolWitness().Witness;
+      maybeVisitRelatedConformance(ProtocolConformanceRef(baseConformance));
+      break;
+    }
+    case SILWitnessTable::WitnessKind::AssociatedTypeProtocol: {
+      auto assocConformance = E.getAssociatedTypeProtocolWitness().Witness;
+      maybeVisitRelatedConformance(assocConformance);
+      break;
+    }
+    
+    case SILWitnessTable::WitnessKind::AssociatedType:
+    case SILWitnessTable::WitnessKind::Invalid:
+      break;
     }
   }
 
   return performFuncDeserialization;
 }
 
+bool SILLinkerVisitor::visitApplySubstitutions(const SubstitutionMap &subs) {
+  bool performFuncDeserialization = false;
+  
+  for (auto &reqt : subs.getGenericSignature()->getRequirements()) {
+    switch (reqt.getKind()) {
+    case RequirementKind::Conformance: {
+      auto conformance = subs.lookupConformance(
+          reqt.getFirstType()->getCanonicalType(),
+          cast<ProtocolDecl>(reqt.getSecondType()->getAnyNominal()))
+        .getValue();
+      
+      // Formally all conformances referenced in a function application are
+      // used. However, eagerly visiting them all at this point leads to a
+      // large blowup in the amount of SIL we read in, and we aren't very
+      // systematic about laziness. For optimization purposes we can defer
+      // reading in most conformances until we need them for devirtualization.
+      // However, we *must* pull in shared clang-importer-derived conformances
+      // we potentially use, since we may not otherwise have a local definition.
+      if (mustDeserializeProtocolConformance(Mod, conformance)) {
+        performFuncDeserialization |=
+                                    visitProtocolConformance(conformance, None);
+      }
+      break;
+    }
+    case RequirementKind::Layout:
+    case RequirementKind::SameType:
+    case RequirementKind::Superclass:
+      break;
+    }
+  }
+  
+  return performFuncDeserialization;
+}
+
 bool SILLinkerVisitor::visitInitExistentialAddrInst(
     InitExistentialAddrInst *IEI) {
   // Link in all protocol conformances that this touches.
diff --git a/lib/SIL/Linker.h b/lib/SIL/Linker.h
index cca5839..8cb5a3a 100644
--- a/lib/SIL/Linker.h
+++ b/lib/SIL/Linker.h
@@ -31,6 +31,9 @@
   /// The SILLoader that this visitor is using to link.
   SerializedSILLoader *Loader;
 
+  /// Break cycles visiting recursive protocol conformances.
+  llvm::DenseSet<ProtocolConformance *> VisitedConformances;
+
   /// Worklist of SILFunctions we are processing.
   llvm::SmallVector<SILFunction *, 128> Worklist;
 
@@ -65,6 +68,7 @@
   bool visitFunctionRefInst(FunctionRefInst *FRI);
   bool visitProtocolConformance(ProtocolConformanceRef C,
                                 const Optional<SILDeclRef> &Member);
+  bool visitApplySubstitutions(const SubstitutionMap &subs);
   bool visitWitnessMethodInst(WitnessMethodInst *WMI) {
     return visitProtocolConformance(WMI->getConformance(), WMI->getMember());
   }
diff --git a/lib/SIL/SILModule.cpp b/lib/SIL/SILModule.cpp
index 40adaf9..4b36e23 100644
--- a/lib/SIL/SILModule.cpp
+++ b/lib/SIL/SILModule.cpp
@@ -22,6 +22,7 @@
 #include "Linker.h"
 #include "swift/SIL/SILVisitor.h"
 #include "swift/SIL/SILValue.h"
+#include "swift/ClangImporter/ClangModule.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringSwitch.h"
@@ -774,6 +775,23 @@
   }
 }
 
+bool SILModule::
+shouldSerializeEntitiesAssociatedWithDeclContext(const DeclContext *DC) const {
+  // Serialize entities associated with this module's associated context.
+  if (DC->isChildContextOf(getAssociatedContext())) {
+    return true;
+  }
+  
+  // Serialize entities associated with clang modules, since other entities
+  // may depend on them, and someone who deserializes those entities may not
+  // have their own copy.
+  if (isa<ClangModuleUnit>(DC->getModuleScopeContext())) {
+    return true;
+  }
+  
+  return false;
+}
+
 /// Returns true if it is the OnoneSupport module.
 bool SILModule::isOnoneSupportModule() const {
   return getSwiftModule()->getName().str() == SWIFT_ONONE_SUPPORT;
diff --git a/lib/Serialization/Deserialization.cpp b/lib/Serialization/Deserialization.cpp
index 599b498..896131a 100644
--- a/lib/Serialization/Deserialization.cpp
+++ b/lib/Serialization/Deserialization.cpp
@@ -625,6 +625,8 @@
 
   ASTContext &ctx = getContext();
   DeclContext *dc = getDeclContext(contextID);
+  assert(!isa<ClangModuleUnit>(dc->getModuleScopeContext())
+         && "should not have serialized a conformance from a clang module");
   Type conformingType = dc->getDeclaredInterfaceType();
   PrettyStackTraceType trace(ctx, "reading conformance for", conformingType);
 
diff --git a/lib/Serialization/DeserializeSIL.cpp b/lib/Serialization/DeserializeSIL.cpp
index fb3cb8e..915bb6e 100644
--- a/lib/Serialization/DeserializeSIL.cpp
+++ b/lib/Serialization/DeserializeSIL.cpp
@@ -2814,6 +2814,14 @@
     if (Callback)
       Callback->didDeserialize(MF->getAssociatedModule(), wT);
   }
+  
+  // We may see multiple shared-linkage definitions of the same witness table
+  // for the same conformance.
+  if (wT->isDefinition() && hasSharedVisibility(*Linkage)
+      && hasSharedVisibility(wT->getLinkage())) {
+    wTableOrOffset.set(wT, /*fully deserialized*/ true);
+    return wT;
+  }
 
   assert(wT->isDeclaration() && "Our witness table at this point must be a "
                                 "declaration.");
diff --git a/lib/Serialization/Serialization.cpp b/lib/Serialization/Serialization.cpp
index fa7b05b..8162e22 100644
--- a/lib/Serialization/Serialization.cpp
+++ b/lib/Serialization/Serialization.cpp
@@ -1608,7 +1608,9 @@
   switch (conformance->getKind()) {
   case ProtocolConformanceKind::Normal: {
     auto normal = cast<NormalProtocolConformance>(conformance);
-    if (!isDeclXRef(getDeclForContext(normal->getDeclContext()))) {
+    if (!isDeclXRef(getDeclForContext(normal->getDeclContext()))
+        && !isa<ClangModuleUnit>(normal->getDeclContext()
+                                       ->getModuleScopeContext())) {
       // A normal conformance in this module file.
       unsigned abbrCode = abbrCodes[NormalProtocolConformanceIdLayout::Code];
       NormalProtocolConformanceIdLayout::emitRecord(Out, ScratchRecord,
diff --git a/lib/Serialization/SerializeSIL.cpp b/lib/Serialization/SerializeSIL.cpp
index 5debc36..5c670b8 100644
--- a/lib/Serialization/SerializeSIL.cpp
+++ b/lib/Serialization/SerializeSIL.cpp
@@ -2454,21 +2454,23 @@
   assert(assocDC && "cannot serialize SIL without an associated DeclContext");
   for (const SILVTable &vt : SILMod->getVTables()) {
     if ((ShouldSerializeAll || vt.isSerialized()) &&
-        vt.getClass()->isChildContextOf(assocDC))
+        SILMod->shouldSerializeEntitiesAssociatedWithDeclContext(vt.getClass()))
       writeSILVTable(vt);
   }
   
   // Write out property descriptors.
   for (const SILProperty &prop : SILMod->getPropertyList()) {
     if ((ShouldSerializeAll || prop.isSerialized()) &&
-        prop.getDecl()->getInnermostDeclContext()->isChildContextOf(assocDC))
+        SILMod->shouldSerializeEntitiesAssociatedWithDeclContext(
+                                     prop.getDecl()->getInnermostDeclContext()))
       writeSILProperty(prop);
   }
 
   // Write out fragile WitnessTables.
   for (const SILWitnessTable &wt : SILMod->getWitnessTables()) {
     if ((ShouldSerializeAll || wt.isSerialized()) &&
-        wt.getConformance()->getDeclContext()->isChildContextOf(assocDC))
+        SILMod->shouldSerializeEntitiesAssociatedWithDeclContext(
+                                         wt.getConformance()->getDeclContext()))
       writeSILWitnessTable(wt);
   }
 
@@ -2476,7 +2478,8 @@
   for (const SILDefaultWitnessTable &wt : SILMod->getDefaultWitnessTables()) {
     // FIXME: Don't need to serialize private and internal default witness
     // tables.
-    if (wt.getProtocol()->getDeclContext()->isChildContextOf(assocDC))
+    if (SILMod->shouldSerializeEntitiesAssociatedWithDeclContext(
+                                                              wt.getProtocol()))
       writeSILDefaultWitnessTable(wt);
   }
 
diff --git a/test/IRGen/Inputs/deserialize-clang-importer-witness-tables/regex.swift b/test/IRGen/Inputs/deserialize-clang-importer-witness-tables/regex.swift
new file mode 100644
index 0000000..ce8c929
--- /dev/null
+++ b/test/IRGen/Inputs/deserialize-clang-importer-witness-tables/regex.swift
@@ -0,0 +1,25 @@
+
+import Foundation
+
+public struct RegEx {
+    public let pattern: String
+    fileprivate let regex: NSRegularExpression
+    public typealias Options = NSRegularExpression.Options
+    
+    public init(pattern: String, options: Options = []) throws {
+        self.pattern = pattern
+        self.regex = try NSRegularExpression(pattern: pattern, options: options)
+    }
+    
+    /// Returns a match group for the first match, or nil if there was no match.
+    public func firstMatch(in string: String) -> [String]? {
+        let nsString = string as NSString
+        
+        return regex.firstMatch(in: string, range: NSMakeRange(0, nsString.length)).map { match -> [String] in
+            return (1 ..< match.numberOfRanges).map { idx -> String in
+                let range = match.range(at: idx)
+                return range.location == NSNotFound ? "" : nsString.substring(with: range)
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/test/IRGen/deserialize-clang-importer-witness-tables.swift b/test/IRGen/deserialize-clang-importer-witness-tables.swift
new file mode 100644
index 0000000..9e66fbc
--- /dev/null
+++ b/test/IRGen/deserialize-clang-importer-witness-tables.swift
@@ -0,0 +1,16 @@
+// RUN: %empty-directory(%t)
+// RUN: %target-swift-frontend -swift-version 4 -emit-module -o %t/regex.swiftmodule %S/Inputs/deserialize-clang-importer-witness-tables/regex.swift
+// RUN: %target-swift-frontend -swift-version 4 -emit-ir %s -I %t | %FileCheck %s
+// REQUIRES: objc_interop
+import regex
+
+public func foo(line: String) {
+  // The NSRegularExpressionOptions: SetAlgebra conformance is used indirectly
+  // from the default argument expression passed to `RegEx(pattern:options:)`
+  // below. Ensure that a local copy of the definition was deserialized
+  // and lowered to IR.
+  // CHECK-LABEL: define {{.*}} i8** @"$SSo26NSRegularExpressionOptionsVs10SetAlgebraSCWa"
+  // CHECK-LABEL: define {{.*}} void @"$SSo26NSRegularExpressionOptionsVs10SetAlgebraSCsACPxycfCTW"
+  let versionRegex = try! RegEx(pattern: "Apple")
+  _ = versionRegex.firstMatch(in: line)  
+}
\ No newline at end of file
diff --git a/test/SIL/Serialization/Inputs/clang_conformances.sil b/test/SIL/Serialization/Inputs/clang_conformances.sil
index 98453cf..bd4ff2a 100644
--- a/test/SIL/Serialization/Inputs/clang_conformances.sil
+++ b/test/SIL/Serialization/Inputs/clang_conformances.sil
@@ -20,3 +20,11 @@
   dealloc_stack %1 : $*ComparisonResult // id: %9
   return %7 : $Bool                               // id: %10
 }
+
+sil @compare_eq : $@convention(witness_method: Equatable) (@in_guaranteed ComparisonResult, @in_guaranteed ComparisonResult, @thick ComparisonResult.Type) -> Bool
+
+sil_witness_table shared [serialized] ComparisonResult: Equatable module __ObjC {
+  method #Equatable."=="!1: @compare_eq
+}
+
+