lib/SILOptimizer/Mandatory/Differentiation.cpp - third_party/swift - Git at Google

 //===--- Differentiation.cpp - SIL Automatic Differentiation --*- C++ -*---===//
 //
 // This source file is part of the Swift.org open source project
 //
 // Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
 // Licensed under Apache License v2.0 with Runtime Library Exception
 //
 // See https://swift.org/LICENSE.txt for license information
 // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
 //
 //===----------------------------------------------------------------------===//
 //
 // SWIFT_ENABLE_TENSORFLOW
 //
 // This file implements automatic differentiation.
 //
 // NOTE: Although the AD feature is developed as part of the Swift for
 // TensorFlow project, it is completely independent from TensorFlow support.
 //
 //===----------------------------------------------------------------------===//

 #define DEBUG_TYPE "differentiation"

 #include "Differentiation.h"
 #include "swift/AST/ASTMangler.h"
 #include "swift/AST/ASTPrinter.h"
 #include "swift/AST/AnyFunctionRef.h"
 #include "swift/AST/AutoDiff.h"
 #include "swift/AST/Builtins.h"
 #include "swift/AST/DeclContext.h"
 #include "swift/AST/DiagnosticsSIL.h"
 #include "swift/AST/Expr.h"
 #include "swift/AST/GenericEnvironment.h"
 #include "swift/AST/GenericSignatureBuilder.h"
 #include "swift/AST/SourceFile.h"
 #include "swift/AST/ParameterList.h"
 #include "swift/AST/SubstitutionMap.h"
 #include "swift/AST/TypeCheckRequests.h"
 #include "swift/SIL/FormalLinkage.h"
 #include "swift/SIL/LoopInfo.h"
 #include "swift/SIL/Projection.h"
 #include "swift/SIL/SILBuilder.h"
 #include "swift/SIL/TypeSubstCloner.h"
 #include "swift/SILOptimizer/Analysis/DominanceAnalysis.h"
 #include "swift/SILOptimizer/Analysis/LoopAnalysis.h"
 #include "swift/SILOptimizer/PassManager/Passes.h"
 #include "swift/SILOptimizer/PassManager/Transforms.h"
 #include "swift/SILOptimizer/Utils/LoopUtils.h"
 #include "swift/SILOptimizer/Utils/SILOptFunctionBuilder.h"
 #include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/BreadthFirstIterator.h"
 #include "llvm/ADT/DenseSet.h"

 using namespace swift;
 using llvm::DenseMap;
 using llvm::SmallDenseMap;
 using llvm::SmallDenseSet;
 using llvm::SmallMapVector;
 using llvm::SmallSet;

 /// This flag is used to disable `differentiable_function_extract` instruction
 /// folding for SIL testing purposes.
 static llvm::cl::opt<bool> SkipFoldingDifferentiableFunctionExtraction(
     "differentiation-skip-folding-differentiable-function-extraction",
     llvm::cl::init(true));

 //===----------------------------------------------------------------------===//
 // Helpers
 //===----------------------------------------------------------------------===//

 /// Prints an "[AD] " prefix to `llvm::dbgs()` and returns the debug stream.
 /// This is being used to print short debug messages within the AD pass.
 static raw_ostream &getADDebugStream() { return llvm::dbgs() << "[AD] "; }

 /// Given a dumpable value, dumps it to `llvm::dbgs()`.
 template <typename T> static inline void debugDump(T &v) {
   LLVM_DEBUG(llvm::dbgs() << "\n==== BEGIN DEBUG DUMP ====\n"
                           << v << "\n==== END DEBUG DUMP ====\n");
 }

 static bool isWithoutDerivative(SILValue v) {
   if (auto *fnRef = dyn_cast<FunctionRefInst>(v))
     return fnRef->getReferencedFunctionOrNull()->hasSemanticsAttr(
         "autodiff.nonvarying");
   return false;
 }

 static bool isArrayLiteralIntrinsic(ApplyInst *ai) {
   return ai->hasSemantics("array.uninitialized_intrinsic");
 }

 static ApplyInst *getAllocateUninitializedArrayIntrinsic(SILValue v) {
   if (auto *applyInst = dyn_cast<ApplyInst>(v))
     if (isArrayLiteralIntrinsic(applyInst))
       return applyInst;
   return nullptr;
 }

 /// Given a value, find its single `destructure_tuple` user if the value is
 /// tuple-typed and such a user exists.
 static DestructureTupleInst *getSingleDestructureTupleUser(SILValue value) {
   bool foundDestructureTupleUser = false;
   if (!value->getType().is<TupleType>())
     return nullptr;
   DestructureTupleInst *result = nullptr;
   for (auto *use : value->getUses()) {
     if (auto *dti = dyn_cast<DestructureTupleInst>(use->getUser())) {
       assert(!foundDestructureTupleUser &&
              "There should only be one `destructure_tuple` user of a tuple");
       foundDestructureTupleUser = true;
       result = dti;
     }
   }
   return result;
 }

 /// Given an `apply` instruction, apply the given callback to each of its
 /// direct results. If the `apply` instruction has a single `destructure_tuple`
 /// user, apply the callback to the results of the `destructure_tuple` user.
 static void forEachApplyDirectResult(
     ApplyInst *ai, llvm::function_ref<void(SILValue)> resultCallback) {
   if (!ai->getType().is<TupleType>()) {
     resultCallback(ai);
     return;
   }
   if (auto *dti = getSingleDestructureTupleUser(ai))
     for (auto result : dti->getResults())
       resultCallback(result);
 }

 /// Given a function, gather all of its formal results (both direct and
 /// indirect) in an order defined by its result type. Note that "formal results"
 /// refer to result values in the body of the function, not at call sites.
 static void
 collectAllFormalResultsInTypeOrder(SILFunction &function,
                                    SmallVectorImpl<SILValue> &results) {
   SILFunctionConventions convs(function.getLoweredFunctionType(),
                                function.getModule());
   auto indResults = function.getIndirectResults();
   auto *retInst = cast<ReturnInst>(function.findReturnBB()->getTerminator());
   auto retVal = retInst->getOperand();
   SmallVector<SILValue, 8> dirResults;
   if (auto *tupleInst =
           dyn_cast_or_null<TupleInst>(retVal->getDefiningInstruction()))
     dirResults.append(tupleInst->getElements().begin(),
                       tupleInst->getElements().end());
   else
     dirResults.push_back(retVal);
   unsigned indResIdx = 0, dirResIdx = 0;
   for (auto &resInfo : convs.getResults())
     results.push_back(resInfo.isFormalDirect() ? dirResults[dirResIdx++]
                                                : indResults[indResIdx++]);
 }

 /// Given a function, gather all of its direct results in an order defined by
 /// its result type. Note that "formal results" refer to result values in the
 /// body of the function, not at call sites.
 static void
 collectAllDirectResultsInTypeOrder(SILFunction &function,
                                    SmallVectorImpl<SILValue> &results) {
   SILFunctionConventions convs(function.getLoweredFunctionType(),
                                function.getModule());
   auto *retInst = cast<ReturnInst>(function.findReturnBB()->getTerminator());
   auto retVal = retInst->getOperand();
   if (auto *tupleInst = dyn_cast<TupleInst>(retVal))
     results.append(tupleInst->getElements().begin(),
                    tupleInst->getElements().end());
   else
     results.push_back(retVal);
 }

 /// Given a function call site, gather all of its actual results (both direct
 /// and indirect) in an order defined by its result type.
 static void collectAllActualResultsInTypeOrder(
     ApplyInst *ai, ArrayRef<SILValue> extractedDirectResults,
     SmallVectorImpl<SILValue> &results) {
   auto calleeConvs = ai->getSubstCalleeConv();
   unsigned indResIdx = 0, dirResIdx = 0;
   for (auto &resInfo : calleeConvs.getResults()) {
     results.push_back(resInfo.isFormalDirect()
                           ? extractedDirectResults[dirResIdx++]
                           : ai->getIndirectSILResults()[indResIdx++]);
   }
 }

 /// Given a range of types, joins these into a single type. If there's exactly
 /// one element type, returns that element type. Otherwise, creates a tuple type
 /// of all element types.
 template <typename TypeRange>
 static CanType joinElementTypes(TypeRange &&range, const ASTContext &ctx) {
   if (range.size() == 1)
     return range.front();
   auto typeElts =
       map<SmallVector<TupleTypeElt, 8>>(range, [&](Type type) { return type; });
   return TupleType::get(typeElts, ctx);
 }

 /// Given a range of SIL values, retrieves the canonical types of these values,
 /// and joins these types into a single type.
 template <typename SILValueRange>
 static CanType joinElementTypesFromValues(SILValueRange &&range,
                                           const ASTContext &ctx) {
   if (range.size() == 1)
     return range.front()->getType().getASTType();
   SmallVector<TupleTypeElt, 8> elts;
   transform(range, elts.begin(),
             [&](SILValue val) { return val->getType().getASTType(); });
   return TupleType::get(elts, ctx)->getCanonicalType();
 }

 /// Given an operator name, such as '+', and a protocol, returns the '+'
 /// operator. If the operator does not exist in the protocol, returns null.
 static FuncDecl *findOperatorDeclInProtocol(DeclName operatorName,
                                             ProtocolDecl *protocol) {
   assert(operatorName.isOperator());
   // Find the operator requirement in the given protocol declaration.
   auto opLookup = protocol->lookupDirect(operatorName);
   for (auto *decl : opLookup) {
     if (!decl->isProtocolRequirement())
       continue;
     auto *fd = dyn_cast<FuncDecl>(decl);
     if (!fd || !fd->isStatic() || !fd->isOperator())
       continue;
     return fd;
   }
   // Not found.
   return nullptr;
 }

 /// Returns the "constrained" derivative generic signature given:
 /// - An original SIL function type.
 /// - A wrt parameter index subset.
 /// - A possibly uncanonical derivative generic signature (optional).
 /// - Additional derivative requirements (optional).
 /// The constrained derivative generic signature constrains all wrt parameters
 /// to conform to `Differentiable`.
 static GenericSignature getConstrainedDerivativeGenericSignature(
     CanSILFunctionType originalFnTy, IndexSubset *paramIndexSet,
     GenericSignature derivativeGenSig) {
   if (!derivativeGenSig)
     derivativeGenSig = originalFnTy->getGenericSignature();
   if (!derivativeGenSig)
     return nullptr;
   // Constrain all wrt parameters to `Differentiable`.
   auto &ctx = derivativeGenSig->getASTContext();
   auto *diffableProto = ctx.getProtocol(KnownProtocolKind::Differentiable);
   SmallVector<Requirement, 4> requirements;
   for (unsigned paramIdx : paramIndexSet->getIndices()) {
     auto paramType = originalFnTy->getParameters()[paramIdx].getType();
     Requirement req(RequirementKind::Conformance, paramType,
                     diffableProto->getDeclaredType());
     requirements.push_back(req);
   }
   return evaluateOrDefault(
       ctx.evaluator,
       AbstractGenericSignatureRequest{
           derivativeGenSig.getPointer(),
           /*addedGenericParams*/ {},
           std::move(requirements)},
       nullptr);
 }

 /// Returns the canonical derivative generic signature for the given
 /// `[differentiable]` attribute and original function.
 /// - Return the `[differentiable]` attribute derivative generic signature if
 ///   it exists.
 /// - Otherwise, return the original function's generic signature.
 static CanGenericSignature getDerivativeGenericSignature(
     SILDifferentiableAttr *attr, SILFunction *original) {
   if (auto attrDerivativeGenSig = attr->getDerivativeGenericSignature())
     return attrDerivativeGenSig->getCanonicalSignature();
   return original->getLoweredFunctionType()->getGenericSignature();
 }

 // Clone the generic parameters of the given generic signature and return a new
 // `GenericParamList`.
 static GenericParamList *cloneGenericParameters(ASTContext &ctx,
                                                 DeclContext *dc,
                                                 CanGenericSignature sig) {
   SmallVector<GenericTypeParamDecl *, 2> clonedParams;
   for (auto paramType : sig->getGenericParams()) {
     auto clonedParam = new (ctx) GenericTypeParamDecl(
         dc, paramType->getName(), SourceLoc(), paramType->getDepth(),
         paramType->getIndex());
     clonedParam->setDeclContext(dc);
     clonedParam->setImplicit(true);
     clonedParams.push_back(clonedParam);
   }
   return GenericParamList::create(ctx, SourceLoc(), clonedParams, SourceLoc());
 }

 /// Given an `differentiable_function` instruction, find the corresponding
 /// differential operator used in the AST. If no differential operator is found,
 /// return nullptr.
 static DifferentiableFunctionExpr *
 findDifferentialOperator(DifferentiableFunctionInst *inst) {
   return inst->getLoc().getAsASTNode<DifferentiableFunctionExpr>();
 }

 /// Returns the underlying instruction for the given SILValue, if it exists,
 /// peering through function conversion instructions.
 template<class Inst>
 static Inst *peerThroughFunctionConversions(SILValue value) {
   if (auto *inst = dyn_cast<Inst>(value))
     return inst;
   if (auto *thinToThick = dyn_cast<ThinToThickFunctionInst>(value))
     return peerThroughFunctionConversions<Inst>(thinToThick->getOperand());
   if (auto *convertFn = dyn_cast<ConvertFunctionInst>(value))
     return peerThroughFunctionConversions<Inst>(convertFn->getOperand());
   if (auto *partialApply = dyn_cast<PartialApplyInst>(value))
     return peerThroughFunctionConversions<Inst>(partialApply->getCallee());
   return nullptr;
 }

 //===----------------------------------------------------------------------===//
 // Auxiliary data structures
 //===----------------------------------------------------------------------===//

 namespace {
 class ADContext;

 /// The invoker of a differentiation task. It can be some user syntax, e.g.
 /// an `differentiable_function` instruction lowered from an
 /// `DifferentiableFunctionExpr` expression, the differentiation pass, or
 /// nothing at all. This will be used to emit informative diagnostics.
 struct DifferentiationInvoker {
 public:
   /// The kind of the invoker of a differentiation task.
   enum class Kind {
     // Invoked by an `differentiable_function` instruction, which may or may not
     // be linked to a Swift AST node (e.g. an `DifferentiableFunctionExpr`
     // expression).
     DifferentiableFunctionInst,

     // Invoked by the indirect application of differentiation. This case has an
     // associated original `apply` instruction and `[differentiable]` attribute.
     IndirectDifferentiation,

     // Invoker by a `[differentiable]` attribute in SIL **without** being linked
     // to a Swift AST attribute. This case has an associated `[differentiable]`
     // attribute.
     SILDifferentiableAttribute
   };

 private:
   Kind kind;
   union Value {
     /// The instruction associated with the `DifferentiableFunctionInst` case.
     DifferentiableFunctionInst *diffFuncInst;
     Value(DifferentiableFunctionInst *inst) : diffFuncInst(inst) {}

     /// The parent `apply` instruction and `[differentiable]` attribute
     /// associated with the `IndirectDifferentiation` case.
     std::pair<ApplyInst *, SILDifferentiableAttr *>
         indirectDifferentiation;
     Value(ApplyInst *applyInst, SILDifferentiableAttr *attr)
         : indirectDifferentiation({applyInst, attr}) {}

     /// The `[differentiable]` attribute associated with the
     /// `SILDifferentiableAttribute` case.
     SILDifferentiableAttr *silDifferentiableAttribute;
     Value(SILDifferentiableAttr *attr) : silDifferentiableAttribute(attr) {}
   } value;

   /*implicit*/
   DifferentiationInvoker(Kind kind, Value value) : kind(kind), value(value) {}

 public:
   DifferentiationInvoker(DifferentiableFunctionInst *inst)
       : kind(Kind::DifferentiableFunctionInst), value(inst) {}
   DifferentiationInvoker(ApplyInst *applyInst, SILDifferentiableAttr *attr)
       : kind(Kind::IndirectDifferentiation),
         value({applyInst, attr}) {}
   DifferentiationInvoker(SILDifferentiableAttr *attr)
       : kind(Kind::SILDifferentiableAttribute), value(attr) {}

   Kind getKind() const { return kind; }

   DifferentiableFunctionInst *getDifferentiableFunctionInst() const {
     assert(kind == Kind::DifferentiableFunctionInst);
     return value.diffFuncInst;
   }

   std::pair<ApplyInst *, SILDifferentiableAttr *>
   getIndirectDifferentiation() const {
     assert(kind == Kind::IndirectDifferentiation);
     return value.indirectDifferentiation;
   }


   SILDifferentiableAttr *getSILDifferentiableAttribute() const {
     assert(kind == Kind::SILDifferentiableAttribute);
     return value.silDifferentiableAttribute;
   }

   SourceLoc getLocation() const {
     switch (kind) {
     case Kind::DifferentiableFunctionInst:
       return getDifferentiableFunctionInst()->getLoc().getSourceLoc();
     case Kind::IndirectDifferentiation:
       return getIndirectDifferentiation().first->getLoc().getSourceLoc();
     case Kind::SILDifferentiableAttribute:
       return getSILDifferentiableAttribute()->getOriginal()
           ->getLocation().getSourceLoc();
     }
   }

   void print(llvm::raw_ostream &os) const;
 };

 class DifferentiableActivityInfo;

 /// Information about the JVP/VJP function produced during JVP/VJP generation,
 /// e.g. mappings from original values to corresponding values in the
 /// pullback/differential struct.
 ///
 /// A linear map struct is an aggregate value containing linear maps checkpointed
 /// during the JVP/VJP computation. Linear map structs are generated for every
 /// original function during JVP/VJP generation. Linear map struct values are
 /// constructed by JVP/VJP functions and consumed by pullback/differential
 /// functions.
 class LinearMapInfo {
 private:
   /// The linear map kind.
   AutoDiffLinearMapKind kind;

   /// The original function.
   SILFunction *const original;

   /// The derivative function.
   SILFunction *const derivative;

   /// Activity info of the original function.
   const DifferentiableActivityInfo &activityInfo;

   /// Differentiation indices of the function.
   const SILAutoDiffIndices &indices;

   /// Mapping from original basic blocks to linear map structs.
   DenseMap<SILBasicBlock *, StructDecl *> linearMapStructs;

   /// Mapping from original basic blocks to branching trace enums.
   /// For pullbacks: these are predecessor enums.
   /// For differentials: these are successor enums.
   DenseMap<SILBasicBlock *, EnumDecl *> branchingTraceDecls;

   /// Mapping from `apply` and `struct_extract` instructions in the original
   /// function to the corresponding linear map declaration in the linear map
   /// struct.
   DenseMap<SILInstruction *, VarDecl *> linearMapValueMap;

   /// Mapping from predecessor+succcessor basic block pairs in original function
   /// to the corresponding branching trace enum case.
   DenseMap<std::pair<SILBasicBlock *, SILBasicBlock *>, EnumElementDecl *>
       branchingTraceEnumCases;

   /// Mapping from linear map structs to their branching trace enum fields.
   DenseMap<StructDecl *, VarDecl *> linearMapStructEnumFields;

   /// A type converter, used to compute struct/enum SIL types.
   Lowering::TypeConverter &typeConverter;

 private:
   /// Remaps the given type into the derivative function's context.
   SILType remapTypeInDerivative(SILType ty) {
     if (ty.hasArchetype())
       return derivative->mapTypeIntoContext(ty.mapTypeOutOfContext());
     return derivative->mapTypeIntoContext(ty);
   }

   /// Adds a `VarDecl` member with the given name and type to the given nominal
   /// declaration.
   VarDecl *addVarDecl(NominalTypeDecl *nominal, StringRef name, Type type) {
     auto &astCtx = nominal->getASTContext();
     auto id = astCtx.getIdentifier(name);
     auto *varDecl = new (astCtx) VarDecl(
         /*IsStatic*/ false, VarDecl::Introducer::Var, /*IsCaptureList*/ false,
         SourceLoc(), id, nominal);
     varDecl->setAccess(nominal->getEffectiveAccess());
     if (type->hasArchetype())
       varDecl->setInterfaceType(type->mapTypeOutOfContext());
     else
       varDecl->setInterfaceType(type);
     nominal->addMember(varDecl);
     return varDecl;
   }

   /// Retrieves the file unit that contains implicit declarations in the
   /// current Swift module. If it does not exist, create one.
   ///
   // FIXME: Currently it defaults to the file containing `original`, if it can
   // be determined. Otherwise, it defaults to any file unit in the module. To
   // handle this more properly, we could revive the DerivedFileUnit class to
   // contain all synthesized implicit type declarations.
   SourceFile &getDeclarationFileUnit() {
     if (original->hasLocation())
       if (auto *declContext = original->getLocation().getAsDeclContext())
         if (auto *parentSourceFile = declContext->getParentSourceFile())
           return *parentSourceFile;
     for (auto *file : original->getModule().getSwiftModule()->getFiles())
       if (auto *src = dyn_cast<SourceFile>(file))
         return *src;
     llvm_unreachable("No files?");
   }

   /// Compute and set the access level for the given nominal type, given the
   /// original function linkage.
   void computeAccessLevel(
       NominalTypeDecl *nominal, SILLinkage originalLinkage) {
     auto &astCtx = nominal->getASTContext();
     switch (originalLinkage) {
     case swift::SILLinkage::Public:
     case swift::SILLinkage::PublicNonABI:
       nominal->setAccess(AccessLevel::Internal);
       nominal->getAttrs().add(
           new (astCtx) UsableFromInlineAttr(/*Implicit*/ true));
       break;
     case swift::SILLinkage::Hidden:
     case swift::SILLinkage::Shared:
       nominal->setAccess(AccessLevel::Internal);
       break;
     case swift::SILLinkage::Private:
       nominal->setAccess(AccessLevel::FilePrivate);
       break;
     default:
       // When the original function has external linkage, we create an internal
       // struct for use by our own module. This is necessary for cross-cell
       // differentiation in Jupyter.
       // TODO: Add a test in the compiler that exercises a similar situation as
       // cross-cell differentiation in Jupyter.
       nominal->setAccess(AccessLevel::Internal);
     }
   }

   /// Creates an enum declaration with the given JVP/VJP generic signature,
   /// whose cases represent the predecessors/successors of the given original
   /// block.
   EnumDecl *createBranchingTraceDecl(SILBasicBlock *originalBB,
                                      SILAutoDiffIndices indices,
                                      CanGenericSignature genericSig,
                                      SILLoopInfo *loopInfo) {
     assert(originalBB->getParent() == original);
     auto &astCtx = original->getASTContext();
     auto *moduleDecl = original->getModule().getSwiftModule();
     auto &file = getDeclarationFileUnit();
     // Create a branching trace enum.
     std::string enumName;
     switch (kind) {
     case AutoDiffLinearMapKind::Differential:
       enumName =
           "_AD__" + original->getName().str() +
           "_bb" + std::to_string(originalBB->getDebugID()) +
           "__Succ__" + indices.mangle();
       break;
     case AutoDiffLinearMapKind::Pullback:
       enumName =
           "_AD__" + original->getName().str() +
           "_bb" + std::to_string(originalBB->getDebugID()) +
           "__Pred__" + indices.mangle();
       break;
     }
     auto enumId = astCtx.getIdentifier(enumName);
     auto loc = original->getLocation().getSourceLoc();
     GenericParamList *genericParams = nullptr;
     if (genericSig)
       genericParams = cloneGenericParameters(astCtx, &file, genericSig);
     auto *branchingTraceDecl = new (astCtx) EnumDecl(
         /*EnumLoc*/ SourceLoc(), /*Name*/ enumId, /*NameLoc*/ loc,
         /*Inherited*/ {}, /*GenericParams*/ genericParams, /*DC*/ &file);
     // Note: must mark enum as implicit to satisfy assertion in
     // `Parser::parseDeclListDelayed`.
     branchingTraceDecl->setImplicit();
     if (genericSig)
       branchingTraceDecl->setGenericSignature(genericSig);
     computeAccessLevel(branchingTraceDecl,
                        original->getEffectiveSymbolLinkage());
     branchingTraceDecl->computeType();
     assert(branchingTraceDecl->hasInterfaceType());
     file.addVisibleDecl(branchingTraceDecl);
     // Add basic block enum cases.
     for (auto *predBB : originalBB->getPredecessorBlocks()) {
       auto bbId = "bb" + std::to_string(predBB->getDebugID());
       auto *linearMapStruct = getLinearMapStruct(predBB);
       assert(linearMapStruct);
       auto linearMapStructTy =
           linearMapStruct->getDeclaredInterfaceType()->getCanonicalType();
       // Create dummy declaration representing enum case parameter.
       auto *decl = new (astCtx)
           ParamDecl(ParamDecl::Specifier::Default, loc, loc, Identifier(), loc,
                     Identifier(), moduleDecl);
       if (linearMapStructTy->hasArchetype())
         decl->setInterfaceType(linearMapStructTy->mapTypeOutOfContext());
       else
         decl->setInterfaceType(linearMapStructTy);
       // Create enum element and enum case declarations.
       auto *paramList = ParameterList::create(astCtx, {decl});
       auto *enumEltDecl = new (astCtx) EnumElementDecl(
           /*IdentifierLoc*/ loc, DeclName(astCtx.getIdentifier(bbId)),
           paramList, loc, /*RawValueExpr*/ nullptr, branchingTraceDecl);
       enumEltDecl->setImplicit();
       enumEltDecl->computeType();
       auto *enumCaseDecl = EnumCaseDecl::create(
           /*CaseLoc*/ loc, {enumEltDecl}, branchingTraceDecl);
       enumCaseDecl->setImplicit();
       branchingTraceDecl->addMember(enumEltDecl);
       branchingTraceDecl->addMember(enumCaseDecl);
       // Record enum element declaration.
       branchingTraceEnumCases.insert({{predBB, originalBB}, enumEltDecl});
     }
     // If original block is in a loop, mark branching trace enum as indirect.
     if (loopInfo->getLoopFor(originalBB))
       branchingTraceDecl->getAttrs().add(
           new (astCtx) IndirectAttr(/*Implicit*/ true));
     return branchingTraceDecl;
   }

   /// Creates a struct declaration with the given JVP/VJP generic signature, for
   /// storing the linear map values and predecessor/successor basic block of the
   /// given original block.
   StructDecl *
   createLinearMapStruct(SILBasicBlock *originalBB, SILAutoDiffIndices indices,
                         CanGenericSignature genericSig) {
     assert(originalBB->getParent() == original);
     auto *original = originalBB->getParent();
     auto &astCtx = original->getASTContext();
     auto &file = getDeclarationFileUnit();
     std::string structName;
     switch (kind) {
     case swift::AutoDiffLinearMapKind::Differential:
       structName =
           "_AD__" + original->getName().str() +
           "_bb" + std::to_string(originalBB->getDebugID()) +
           "__DF__" + indices.mangle();
       break;
     case swift::AutoDiffLinearMapKind::Pullback:
       structName =
           "_AD__" + original->getName().str() +
           "_bb" + std::to_string(originalBB->getDebugID()) +
           "__PB__" + indices.mangle();
       break;
     }
     auto structId = astCtx.getIdentifier(structName);
     GenericParamList *genericParams = nullptr;
     if (genericSig)
       genericParams = cloneGenericParameters(astCtx, &file, genericSig);
     auto *linearMapStruct = new (astCtx) StructDecl(
         /*StructLoc*/ SourceLoc(), /*Name*/ structId, /*NameLoc*/ SourceLoc(),
         /*Inherited*/ {}, /*GenericParams*/ genericParams, /*DC*/ &file);
     // Note: must mark struct as implicit to satisfy assertion in
     // `Parser::parseDeclListDelayed`.
     linearMapStruct->setImplicit();
     if (genericSig)
       linearMapStruct->setGenericSignature(genericSig);
     computeAccessLevel(
         linearMapStruct, original->getEffectiveSymbolLinkage());
     linearMapStruct->computeType();
     assert(linearMapStruct->hasInterfaceType());
     file.addVisibleDecl(linearMapStruct);
     return linearMapStruct;
   }

   /// Add a linear map to the linear map struct.
   VarDecl *addLinearMapDecl(SILInstruction *inst, SILType linearMapType) {
     // IRGen requires decls to have AST types (not `SILFunctionType`), so we
     // convert the `SILFunctionType` of the linear map to a `FunctionType` with
     // the same parameters and results.
     auto silFnTy = linearMapType.castTo<SILFunctionType>();
     SmallVector<AnyFunctionType::Param, 8> params;
     for (auto &param : silFnTy->getParameters())
       params.push_back(AnyFunctionType::Param(param.getType()));
     AnyFunctionType *astFnTy;
     if (auto genSig = silFnTy->getGenericSignature())
       astFnTy = GenericFunctionType::get(
           genSig, params, silFnTy->getAllResultsType().getASTType());
     else
       astFnTy = FunctionType::get(
           params, silFnTy->getAllResultsType().getASTType());

     auto *origBB = inst->getParent();
     auto *linMapStruct = getLinearMapStruct(origBB);
     std::string linearMapName;
     switch (kind) {
     case AutoDiffLinearMapKind::Differential:
       linearMapName = "differential_" + llvm::itostr(linearMapValueMap.size());
       break;
     case AutoDiffLinearMapKind::Pullback:
       linearMapName = "pullback_" + llvm::itostr(linearMapValueMap.size());
       break;
     }
     auto *linearMapDecl = addVarDecl(linMapStruct, linearMapName, astFnTy);
     linearMapValueMap.insert({inst, linearMapDecl});
     return linearMapDecl;
   }

   /// Given an `apply` instruction, conditionally adds its linear map function
   /// to the linear map struct if it is active.
   void addLinearMapToStruct(ADContext &context, ApplyInst *ai,
                             const SILAutoDiffIndices &indices);

   /// Generate linear map struct and branching enum declarations for the given
   /// function. Linear map structs are populated with linear map fields and a
   /// branching enum field.
   void generateDifferentiationDataStructures(
       ADContext &context, const SILAutoDiffIndices &indices,
       SILFunction *derivative);

 public:
   bool shouldDifferentiateApplyInst(ApplyInst *ai);
   bool shouldDifferentiateInstruction(SILInstruction *inst);

   LinearMapInfo(const LinearMapInfo &) = delete;
   LinearMapInfo &operator=(const LinearMapInfo &) = delete;

   explicit LinearMapInfo(ADContext &context,
                          AutoDiffLinearMapKind kind,
                          SILFunction *original, SILFunction *derivative,
                          const SILAutoDiffIndices &indices,
                          const DifferentiableActivityInfo &activityInfo);

   /// Returns the linear map struct associated with the given original block.
   StructDecl *getLinearMapStruct(SILBasicBlock *origBB) const {
     return linearMapStructs.lookup(origBB);
   }

   /// Returns the lowered SIL type of the linear map struct associated with the
   /// given original block.
   SILType getLinearMapStructLoweredType(SILBasicBlock *origBB) const {
     auto *linMapStruct = getLinearMapStruct(origBB);
     auto linMapStructType =
         linMapStruct->getDeclaredInterfaceType()->getCanonicalType();
     return typeConverter.getLoweredType(linMapStructType,
                                         ResilienceExpansion::Minimal);
   }

   /// Returns the branching trace enum associated with the given original block.
   EnumDecl *getBranchingTraceDecl(SILBasicBlock *origBB) const {
     return branchingTraceDecls.lookup(origBB);
   }

   /// Returns the lowered SIL type of the branching trace enum associated with
   /// the given original block.
   SILType getBranchingTraceEnumLoweredType(SILBasicBlock *origBB) const {
     auto *traceDecl = getBranchingTraceDecl(origBB);
     auto traceDeclType =
         traceDecl->getDeclaredInterfaceType()->getCanonicalType();
     return typeConverter.getLoweredType(traceDeclType,
                                         ResilienceExpansion::Minimal);
   }

   /// Returns the enum element in the given successor block's branching trace
   /// enum corresponding to the given predecessor block.
   EnumElementDecl *
   lookUpBranchingTraceEnumElement(SILBasicBlock *origPredBB,
                                   SILBasicBlock *origSuccBB) const {
     assert(origPredBB->getParent() == original);
     return branchingTraceEnumCases.lookup({origPredBB, origSuccBB});
   }

   /// Returns the mapping from linear map structs to their branching trace enum
   /// fields.
   DenseMap<StructDecl *, VarDecl *> &getLinearMapStructEnumFields() {
     return linearMapStructEnumFields;
   }

   /// Returns the branching trace enum field for the linear map struct of the
   /// given original block.
   VarDecl *lookUpLinearMapStructEnumField(SILBasicBlock *origBB) {
     auto *linearMapStruct = getLinearMapStruct(origBB);
     return linearMapStructEnumFields.lookup(linearMapStruct);
   }

   /// Finds the linear map declaration in the pullback struct for an `apply` or
   /// `struct_extract` in the original function.
   VarDecl *lookUpLinearMapDecl(SILInstruction *inst) {
     auto lookup = linearMapValueMap.find(inst);
     assert(lookup != linearMapValueMap.end() &&
            "No linear map declaration corresponding to the given instruction");
     return lookup->getSecond();
   }
 };

 /// Stores `apply` instruction information calculated by VJP generation.
 struct NestedApplyInfo {
   /// The differentiation indices that are used to differentiate this `apply`
   /// instruction.
   SILAutoDiffIndices indices;
   /// The original pullback type before reabstraction. `None` if the pullback
   /// type is not reabstracted.
   Optional<CanSILFunctionType> originalPullbackType;
 };

 static inline llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
                                             DifferentiationInvoker invoker) {
   invoker.print(os);
   return os;
 }

 void DifferentiationInvoker::print(llvm::raw_ostream &os) const {
   os << "(differentiation_invoker ";
   switch (kind) {
   case Kind::DifferentiableFunctionInst:
     os << "differentiable_function_inst=(" << *getDifferentiableFunctionInst()
        << ")";
     break;
   case Kind::IndirectDifferentiation: {
     auto indDiff = getIndirectDifferentiation();
     os << "indirect_differentiation=(" << *std::get<0>(indDiff) << ')';
     // TODO: Enable printing parent invokers.
     // May require storing a `DifferentiableInvoker *` in the
     // `IndirectDifferentiation` case.
     /*
     SILInstruction *inst;
     SILDifferentiableAttr *attr;
     std::tie(inst, attr) = getIndirectDifferentiation();
     auto invokerLookup = invokers.find(attr); // No access to ADContext?
     assert(invokerLookup != invokers.end() && "Expected parent invoker");
     */
     break;
   }
   case Kind::SILDifferentiableAttribute: {
     auto diffAttr = getSILDifferentiableAttribute();
     os << "sil_differentiable_attribute=(attr=(";
     diffAttr->print(os);
     os << ") function=" << diffAttr->getOriginal()->getName();
     break;
   }
   }
   os << ')';
 }

 //===----------------------------------------------------------------------===//
 // ADContext - Per-module contextual information for the Differentiation pass.
 //===----------------------------------------------------------------------===//

 class ADContext {
 private:
   /// Reference to the main transform.
   SILModuleTransform &transform;

   /// The module where Differentiation is performed on.
   SILModule &module;

   /// AST context.
   ASTContext &astCtx = module.getASTContext();

   /// Shared pass manager.
   SILPassManager &passManager;

   /// The worklist (stack) of `differentiable_function` instructions to be
   /// processed.
   SmallVector<DifferentiableFunctionInst *, 32> differentiableFunctionInsts;

   /// The set of `differentiable_function` instructions that have been
   /// processed. Used to avoid reprocessing invalidated instructions.
   SmallPtrSet<DifferentiableFunctionInst *, 32>
       processedDifferentiableFunctionInsts;

   /// Mapping from `[differentiable]` attributes to invokers.
   /// `SmallMapVector` is used for deterministic insertion order iteration.
   SmallMapVector<SILDifferentiableAttr *, DifferentiationInvoker, 32>
       invokers;

   /// Mapping from `differentiable_function` instructions to result indices.
   DenseMap<DifferentiableFunctionInst *, unsigned> resultIndices;

   /// Mapping from original `apply` instructions to their corresponding
   /// `NestedApplyInfo`s.
   DenseMap<ApplyInst *, NestedApplyInfo> nestedApplyInfo;

   /// List of generated functions (JVPs, VJPs, pullbacks, and thunks).
   /// Saved for deletion during cleanup.
   SmallVector<SILFunction *, 32> generatedFunctions;

   /// List of references to generated functions.
   /// Saved for deletion during cleanup.
   SmallVector<SILValue, 32> generatedFunctionReferences;

   /// The AdditiveArithmetic protocol in the standard library.
   ProtocolDecl *additiveArithmeticProtocol =
       astCtx.getProtocol(KnownProtocolKind::AdditiveArithmetic);

   /// `AdditiveArithmetic.+` declaration.
   mutable FuncDecl *cachedPlusFn = nullptr;
   /// `AdditiveArithmetic.+=` declaration.
   mutable FuncDecl *cachedPlusEqualFn = nullptr;

 public:
   /// Construct an ADContext for the given module.
   explicit ADContext(SILModuleTransform &transform);

   //--------------------------------------------------------------------------//
   // General utilities
   //--------------------------------------------------------------------------//

   SILModuleTransform &getTransform() const { return transform; }
   SILModule &getModule() const { return module; }
   ASTContext &getASTContext() const { return module.getASTContext(); }
   SILPassManager &getPassManager() const { return passManager; }
   Lowering::TypeConverter &getTypeConverter() { return module.Types; }

   SmallVectorImpl<DifferentiableFunctionInst *> &
   getDifferentiableFunctionInsts() {
     return differentiableFunctionInsts;
   }

   SmallPtrSetImpl<DifferentiableFunctionInst *> &
   getProcessedDifferentiableFunctionInsts() {
     return processedDifferentiableFunctionInsts;
   }

   llvm::SmallMapVector<SILDifferentiableAttr *, DifferentiationInvoker, 32> &
   getInvokers() {
     return invokers;
   }

   DenseMap<DifferentiableFunctionInst *, unsigned> &getResultIndices() {
     return resultIndices;
   }

   DenseMap<ApplyInst *, NestedApplyInfo> &getNestedApplyInfo() {
     return nestedApplyInfo;
   }

   SmallVector<SILFunction *, 32> &getGeneratedFunctions() {
     return generatedFunctions;
   }

   SmallVector<SILValue, 32> &getGeneratedFunctionReferences() {
     return generatedFunctionReferences;
   }

   ProtocolDecl *getAdditiveArithmeticProtocol() const {
     return additiveArithmeticProtocol;
   }

   FuncDecl *getPlusDecl() const {
     if (!cachedPlusFn) {
       cachedPlusFn = findOperatorDeclInProtocol(
           astCtx.getIdentifier("+"), additiveArithmeticProtocol);
       assert(cachedPlusFn && "AdditiveArithmetic.+ not found");
     }
     return cachedPlusFn;
   }

   FuncDecl *getPlusEqualDecl() const {
     if (!cachedPlusEqualFn) {
       cachedPlusEqualFn = findOperatorDeclInProtocol(
           astCtx.getIdentifier("+="), additiveArithmeticProtocol);
       assert(cachedPlusEqualFn && "AdditiveArithmetic.+= not found");
     }
     return cachedPlusEqualFn;
   }

   void cleanUp() {
     for (auto invokerPair : invokers) {
       auto *attr = std::get<0>(invokerPair);
       auto *original = attr->getOriginal();
       LLVM_DEBUG(getADDebugStream()
                  << "Removing [differentiable] attribute for "
                  << original->getName() << '\n');
       original->removeDifferentiableAttr(attr);
     }
     // Delete all references to generated functions.
     for (auto fnRef : generatedFunctionReferences) {
       if (auto *fnRefInst =
               peerThroughFunctionConversions<FunctionRefInst>(fnRef)) {
         fnRefInst->replaceAllUsesWithUndef();
         fnRefInst->eraseFromParent();
       }
     }
     // Delete all generated functions.
     for (auto *generatedFunction : generatedFunctions) {
       LLVM_DEBUG(getADDebugStream()
                  << "Deleting generated function "
                  << generatedFunction->getName() << '\n');
       generatedFunction->dropAllReferences();
       transform.notifyWillDeleteFunction(generatedFunction);
       module.eraseFunction(generatedFunction);
     }
   }

   //--------------------------------------------------------------------------//
   // `[differentiable]` attribute lookup and registration
   //--------------------------------------------------------------------------//

   /// Finds the `[differentiable]` attribute on the specified original function
   /// with the exact specified parameter indices. Returns nullptr if no such
   /// attribute exists.
   SILDifferentiableAttr *lookUpDifferentiableAttr(
       SILFunction *original, const SILAutoDiffIndices &indices) const {
     for (auto *attr : original->getDifferentiableAttrs())
       if (attr->getIndices() == indices)
         return attr;
     return nullptr;
   }

   /// Finds the `[differentiable]` attribute on the specified original function
   /// whose parameter indices are a minimal superset of the specified parameter
   /// indices. Returns nullptr if no such attribute exists.
   SILDifferentiableAttr *lookUpMinimalDifferentiableAttr(
       SILFunction *original, const SILAutoDiffIndices &indices) const {
     auto *minimalIndexSet = IndexSubset::getDefault(
         getASTContext(),
         original->getLoweredFunctionType()->getNumParameters(), false);
     auto *indexSet = indices.parameters;
     if (auto *exactAttr = lookUpDifferentiableAttr(original, indices))
       return exactAttr;
     SILDifferentiableAttr *minimalAttr = nullptr;
     for (auto *da : original->getDifferentiableAttrs()) {
       if (da->getIndices().source != indices.source)
         continue;
       auto *daIndexSet = da->getIndices().parameters;
       // If all indices in `indexSet` are in `daIndexSet`, and it has fewer
       // indices than our current candidate and a primitive VJP, then `da` is
       // our new candidate.
       //
       // NOTE(TF-642): `da` may come from a un-partial-applied function and
       // have larger capacity than the desired indices. We expect this logic to
       // go away when `partial_apply` supports `@differentiable` callees.
       if (daIndexSet->isSupersetOf(indexSet->extendingCapacity(
               getASTContext(), daIndexSet->getCapacity())) &&
           // fewer parameters than before
           (minimalIndexSet->isEmpty() ||
            daIndexSet->getNumIndices() < minimalIndexSet->getNumIndices())) {
         minimalAttr = da;
         minimalIndexSet = daIndexSet;
       }
     }
     return minimalAttr;
   }

   /// Finds the `@differentiable` attribute (and its parameter indices) on the
   /// specified original function whose parameter indices are a minimal
   /// superset of the specified parameter indices. Returns nullptr if no such
   /// attribute exists.
   std::pair<const DifferentiableAttr *, IndexSubset *>
   lookUpMinimalASTDifferentiableAttrAndIndexSubset(
       SILDeclRef originalDeclRef, CanSILFunctionType originalFnType,
       const SILAutoDiffIndices &indices) {
     auto *original = originalDeclRef.getDecl();
     const DifferentiableAttr *minimalAttr = nullptr;
     auto *minimalIndexSet = IndexSubset::getDefault(
         getASTContext(), originalFnType->getNumParameters(), false);
     auto *indexSet = indices.parameters;
     for (auto *da : original->getAttrs().getAttributes<DifferentiableAttr>()) {
       auto *daParamIndices = da->getParameterIndices();
       auto *daIndexSet = autodiff::getLoweredParameterIndices(
           daParamIndices, original->getInterfaceType()->castTo<AnyFunctionType>());
       // If all indices in `indexSet` are in `daIndexSet`, and it has fewer
       // indices than our current candidate and a primitive VJP, then `da` is
       // our new candidate.
       //
       // NOTE(TF-642): `da` may come from a un-partial-applied function and
       // have larger capacity than the desired indices. We expect this logic to
       // go away when `partial_apply` supports `@differentiable` callees.
       if (daIndexSet->isSupersetOf(indexSet->extendingCapacity(getASTContext(),
               daIndexSet->getCapacity())) &&
           // fewer parameters than before
           (minimalIndexSet->isEmpty() ||
            daIndexSet->getNumIndices() < minimalIndexSet->getNumIndices())) {
         minimalAttr = da;
         minimalIndexSet = daIndexSet;
       }
     }
     return std::make_pair(minimalAttr, minimalIndexSet);
   }

   /// Creates a `[differentiable]` attribute on the specified original function
   /// with the specified parameter indices.
   SILDifferentiableAttr *createDifferentiableAttr(
       SILFunction *original, const SILAutoDiffIndices &indices,
       GenericSignature derivativeGenericSignature) const {
     assert(!lookUpDifferentiableAttr(original, indices));
     auto derivativeConstrainedGenSig = getConstrainedDerivativeGenericSignature(
         original->getLoweredFunctionType(), indices.parameters,
         derivativeGenericSignature);
     auto *attr = SILDifferentiableAttr::create(getModule(), indices,
                                                /*jvpName*/ StringRef(),
                                                /*vjpName*/ StringRef(),
                                                derivativeConstrainedGenSig);
     original->addDifferentiableAttr(attr);
     return attr;
   }

   /// Finds or creates a `[differentiable]` attribute on the specified
   /// original function corresponding to the specified parameter indices.
   SILDifferentiableAttr *getOrCreateDifferentiableAttr(
       SILFunction *original, const SILAutoDiffIndices &indices,
       GenericSignature derivativeGenericSignature) {
     if (auto *attr = lookUpDifferentiableAttr(original, indices))
       return attr;
     assert(original->isDefinition());
     return createDifferentiableAttr(original, indices,
                                     derivativeGenericSignature);
   }

   /// Creates an `differentiable_function` instruction using the given builder
   /// and arguments. Erase the newly created instruction from the processed set,
   /// if it exists - it may exist in the processed set if it has the same
   /// pointer value as a previously processed and deleted instruction.
   DifferentiableFunctionInst *createDifferentiableFunction(
       SILBuilder &builder, SILLocation loc,
       IndexSubset *parameterIndices, SILValue original,
       Optional<std::pair<SILValue, SILValue>> derivativeFunctions = None) {
     auto *dfi = builder.createDifferentiableFunction(
         loc, parameterIndices, original, derivativeFunctions);
     processedDifferentiableFunctionInsts.erase(dfi);
     return dfi;
   }

 private:
   /// Promotes the given `differentiable_function` instruction to a valid
   /// `@differentiable` function-typed value.
   SILValue promoteToDifferentiableFunction(
       DifferentiableFunctionInst *inst, SILBuilder &builder, SILLocation loc,
       DifferentiationInvoker invoker);

 public:
   /// Process the given `[differentiable]` attribute, filling in JVP/VJPs if
   /// missing.
   bool processDifferentiableAttribute(
       SILFunction *original, SILDifferentiableAttr *attr,
       DifferentiationInvoker invoker);

   /// Process the given `differentiable_function` instruction, filling in
   /// missing derivative functions if necessary.
   bool processDifferentiableFunctionInst(DifferentiableFunctionInst *dfi);

   /// Fold `differentiable_function_extract` users of the given
   /// `differentiable_function` instruction, directly replacing them with
   /// `differentiable_function` instruction operands. If the
   /// `differentiable_function` instruction has no remaining uses, delete the
   /// instruction itself after folding.
   ///
   /// Folding can be disabled by the
   /// `SkipFoldingDifferentiableFunctionExtraction` flag for SIL testing
   /// purposes.
   void foldDifferentiableFunctionExtraction(DifferentiableFunctionInst *source);

   /// Get or create a derivative function parameter index subset thunk from
   /// `actualIndices` to `desiredIndices` for the given associated function
   /// value and original function operand. Returns a pair of the parameter
   /// index subset thunk and its interface substitution map (used to partially
   /// apply the thunk).
   /// Calls `getOrCreateSubsetParametersThunkForLinearMap` to thunk the linear
   /// map returned by the derivative function.
   std::pair<SILFunction *, SubstitutionMap>
   getOrCreateSubsetParametersThunkForDerivativeFunction(
       SILValue origFnOperand, SILValue derivativeFn,
       AutoDiffDerivativeFunctionKind kind, SILAutoDiffIndices desiredIndices,
       SILAutoDiffIndices actualIndices);

   /// Get or create a derivative function parameter index subset thunk from
   /// `actualIndices` to `desiredIndices` for the given associated function
   /// value and original function operand. Returns a pair of the parameter
   /// index subset thunk and its interface substitution map (used to partially
   /// apply the thunk).
   std::pair<SILFunction *, SubstitutionMap>
   getOrCreateSubsetParametersThunkForLinearMap(
       SILFunction *assocFn, CanSILFunctionType linearMapType,
       CanSILFunctionType targetType, AutoDiffDerivativeFunctionKind kind,
       SILAutoDiffIndices desiredIndices, SILAutoDiffIndices actualIndices);

 public:
   /// Declare an external reference to a derivative function of `original`,
   /// given a `[differentiable]` attribute of `original` and the associated
   /// function kind.
   SILFunction *
   declareExternalDerivativeFunction(SILFunction *original,
                                     SILDifferentiableAttr *attr, StringRef name,
                                     AutoDiffDerivativeFunctionKind kind);

   template <typename ...T, typename ...U>
   InFlightDiagnostic diagnose(SourceLoc loc, Diag<T...> diag,
                               U &&...args) const {
     return getASTContext().Diags.diagnose(loc, diag, std::forward<U>(args)...);
   }

   /// Given an instruction and a differentiation task associated with the
   /// parent function, emits a "not differentiable" error based on the task. If
   /// the task is indirect, emits notes all the way up to the outermost task,
   /// and emits an error at the outer task. Otherwise, emits an error directly.
   template<typename ...T, typename ...U>
   InFlightDiagnostic emitNondifferentiabilityError(
       SILInstruction *inst, DifferentiationInvoker invoker,
       Diag<T...> diag, U &&...args);

   /// Given a value and a differentiation task associated with the parent
   /// function, emits a "not differentiable" error based on the task. If the
   /// task is indirect, emits notes all the way up to the outermost task, and
   /// emits an error at the outer task. Otherwise, emits an error directly.
   template<typename ...T, typename ...U>
   InFlightDiagnostic emitNondifferentiabilityError(
       SILValue value, DifferentiationInvoker invoker,
       Diag<T...> diag, U &&...args);

   /// Emit a "not differentiable" error based on the given differentiation task
   /// and diagnostic.
   template<typename ...T, typename ...U>
   InFlightDiagnostic emitNondifferentiabilityError(
       SourceLoc loc, DifferentiationInvoker invoker,
       Diag<T...> diag, U &&...args);
 };
 } // end anonymous namespace

 ADContext::ADContext(SILModuleTransform &transform)
     : transform(transform), module(*transform.getModule()),
       passManager(*transform.getPassManager()) {}

 template<typename ...T, typename ...U>
 InFlightDiagnostic
 ADContext::emitNondifferentiabilityError(SILValue value,
                                          DifferentiationInvoker invoker,
                                          Diag<T...> diag, U &&...args) {
   LLVM_DEBUG({
     getADDebugStream() << "Diagnosing non-differentiability.\n";
     getADDebugStream() << "For value:\n" << value;
     getADDebugStream() << "With invoker:\n" << invoker << '\n';
   });
   auto valueLoc = value.getLoc().getSourceLoc();
   // If instruction does not have a valid location, use the function location
   // as a fallback. Improves diagnostics in some cases.
   if (valueLoc.isInvalid())
     valueLoc = value->getFunction()->getLocation().getSourceLoc();
   return emitNondifferentiabilityError(valueLoc, invoker, diag,
                                        std::forward<U>(args)...);
 }

 template<typename ...T, typename ...U>
 InFlightDiagnostic
 ADContext::emitNondifferentiabilityError(SILInstruction *inst,
                                          DifferentiationInvoker invoker,
                                          Diag<T...> diag, U &&...args) {
   LLVM_DEBUG({
     getADDebugStream() << "Diagnosing non-differentiability.\n";
     getADDebugStream() << "For instruction:\n" << *inst;
     getADDebugStream() << "With invoker:\n" << invoker << '\n';
   });
   auto instLoc = inst->getLoc().getSourceLoc();
   // If instruction does not have a valid location, use the function location
   // as a fallback. Improves diagnostics for `ref_element_addr` generated in
   // synthesized stored property getters.
   if (instLoc.isInvalid())
     instLoc = inst->getFunction()->getLocation().getSourceLoc();
   return emitNondifferentiabilityError(instLoc, invoker, diag,
                                        std::forward<U>(args)...);
 }

 template<typename ...T, typename ...U>
 InFlightDiagnostic
 ADContext::emitNondifferentiabilityError(SourceLoc loc,
                                          DifferentiationInvoker invoker,
                                          Diag<T...> diag, U &&...args) {
   switch (invoker.getKind()) {
   // For `differentiable_function` instructions: if the `differentiable_function`
   // instruction comes from a differential operator, emit an error on the
   // expression and a note on the non-differentiable operation. Otherwise, emit
   // both an error and note on the non-differentiation operation.
   case DifferentiationInvoker::Kind::DifferentiableFunctionInst: {
     auto *inst = invoker.getDifferentiableFunctionInst();
     if (auto *expr = findDifferentialOperator(inst)) {
       diagnose(expr->getLoc(), diag::autodiff_function_not_differentiable_error)
           .highlight(expr->getSubExpr()->getSourceRange());
       return diagnose(loc, diag, std::forward<U>(args)...);
     }
     diagnose(loc, diag::autodiff_expression_not_differentiable_error);
     return diagnose(loc, diag, std::forward<U>(args)...);
   }

   // For `[differentiable]` attributes, try to find an AST function declaration
   // and `@differentiable` attribute. If they are found, emit an error on the
   // `@differentiable` attribute; otherwise, emit an error on the SIL function.
   // Emit a note at the non-differentiable operation.
   case DifferentiationInvoker::Kind::SILDifferentiableAttribute: {
     auto *attr = invoker.getSILDifferentiableAttribute();
     auto *original = attr->getOriginal();
     bool foundAttr = false;
     if (auto *declContext = original->getDeclContext()) {
       if (auto *fnDecl = declContext->getInnermostDeclarationDeclContext()) {
         if (auto *diffAttr =
                 fnDecl->getAttrs().getAttribute<DifferentiableAttr>()) {
           diagnose(diffAttr->getLocation(),
                    diag::autodiff_function_not_differentiable_error)
               .highlight(diffAttr->getRangeWithAt());
           diagnose(original->getLocation().getSourceLoc(),
                    diag::autodiff_when_differentiating_function_definition);
           foundAttr = true;
         }
       }
     }
     // Fallback if we cannot find the expected attribute.
     if (!foundAttr)
       diagnose(original->getLocation().getSourceLoc(),
                diag::autodiff_function_not_differentiable_error);
     return diagnose(loc, diag, std::forward<U>(args)...);
   }

   // For indirect differentiation, emit a "not differentiable" note on the
   // expression first. Then emit an error at the source invoker of
   // differentiation, and a "when differentiating this" note at each indirect
   // invoker.
   case DifferentiationInvoker::Kind::IndirectDifferentiation: {
     SILInstruction *inst;
     SILDifferentiableAttr *attr;
     std::tie(inst, attr) = invoker.getIndirectDifferentiation();
     auto invokerLookup = invokers.find(attr);
     assert(invokerLookup != invokers.end() && "Expected parent invoker");
     emitNondifferentiabilityError(inst, invokerLookup->second,
         diag::autodiff_expression_not_differentiable_note);
     return diagnose(loc, diag::autodiff_when_differentiating_function_call);
   }
   }
 }

 //===----------------------------------------------------------------------===//
 // Activity Analysis
 //===----------------------------------------------------------------------===//

 namespace {
 class DifferentiableActivityCollection;

 /// In many real situations, the end-users of AD need only the derivatives of
 /// some selected outputs of `P` with respect to some selected inputs of `P`.
 /// Whatever the differentiation mode (tangent, reverse,...), these restrictions
 /// allow the AD tool to produce a much more efficient differentiated program.
 /// Essentially, fixing some inputs and neglecting some outputs allows AD to
 /// just forget about several intermediate differentiated variables.
 ///
 /// Activity analysis is the specific analysis that detects these situations,
 /// therefore allowing for a better differentiated code. Activity analysis is
 /// present in all transformation-based AD tools.
 ///
 /// To begin with, the end-user specifies that only some output variables (the
 /// “dependent”) must be differentiated with respect to only some input
 /// variables (the “independent”). We say that variable `y` depends on `x` when
 /// the derivative of `y` with respect to `x` is not trivially null. We say that
 /// a variable is “varied” if it depends on at least one independent. Conversely
 /// we say that a variable is “useful” if at least one dependent depends on it.
 /// Finally, we say that a variable is “active” if it is at the same time varied
 /// and useful. In the special case of the tangent mode, it is easy to check
 /// that when variable `v` is not varied at some place in the program, then its
 /// derivative `v̇` at this place is certainly null. Conversely when variable `v`
 /// is not useful, then whatever the value of `v̇`, this value does not matter
 /// for the final result. Symmetric reasoning applies for the reverse mode of
 /// AD: observing that differentiated variables go upstream, we see that a
 /// useless variable has a null derivative, in other words the partial
 /// derivative of the output with respect to this variable is null. Conversely
 /// when variable `v` is not varied, then whatever the value of `v`, this value
 /// does not matter for the final result.
 ///
 /// Reference:
 /// Laurent Hascoët. Automatic Differentiation by Program Transformation. 2007.
 class DifferentiableActivityAnalysis
     : public FunctionAnalysisBase<DifferentiableActivityCollection> {
 private:
   DominanceAnalysis *dominanceAnalysis = nullptr;
   PostDominanceAnalysis *postDominanceAnalysis = nullptr;

 public:
   explicit DifferentiableActivityAnalysis()
       : FunctionAnalysisBase(SILAnalysisKind::DifferentiableActivity) {}

   static bool classof(const SILAnalysis *s) {
     return s->getKind() == SILAnalysisKind::DifferentiableActivity;
   }

   virtual bool shouldInvalidate(SILAnalysis::InvalidationKind k) override {
     return k & InvalidationKind::Everything;
   }

   virtual std::unique_ptr<DifferentiableActivityCollection>
   newFunctionAnalysis(SILFunction *f) override;

   virtual void initialize(SILPassManager *pm) override;
 };
 } // end anonymous namespace

 namespace {
 /// Represents the differentiation activity associated with a SIL value.
 enum class ActivityFlags : unsigned {
   /// The value depends on a function parameter.
   Varied = 1 << 1,
   /// The value contributes to a result.
   Useful = 1 << 2,
   /// The value is both varied and useful.
   Active = Varied | Useful,
 };

 using Activity = OptionSet<ActivityFlags>;

 /// Result of activity analysis on a function. Accepts queries for whether a
 /// value is "varied", "useful" or "active" against certain differentiation
 /// indices.
 class DifferentiableActivityInfo {
 private:
   DifferentiableActivityCollection &parent;

   /// The derivative generic signature.
   GenericSignature derivativeGenericSignature;

   /// Input values, i.e. parameters (both direct and indirect).
   SmallVector<SILValue, 4> inputValues;
   /// Output values, i.e. individual values (not the final tuple) being returned
   /// by the `return` instruction.
   SmallVector<SILValue, 4> outputValues;

   /// The set of useful variables, indexed by the corresponding dependent value
   /// (output) index.
   SmallVector<SmallDenseSet<SILValue>, 4> usefulValueSets;
   /// The set of useful variables, indexed by the corresponding independent
   /// value (input) index.
   SmallVector<SmallDenseSet<SILValue>, 4> variedValueSets;

   /// The original function.
   SILFunction &getFunction();

   /// The conformance lookup function.
   LookupConformanceFn getLookupConformanceFunction() {
     // Look up in derivative generic signature, if defined.
     if (derivativeGenericSignature)
       return LookUpConformanceInSignature(
           derivativeGenericSignature.getPointer());
     // Otherwise, look up in the module.
     return LookUpConformanceInModule(
         getFunction().getModule().getSwiftModule());
   }

   /// Perform analysis and populate sets.
   void analyze(DominanceInfo *di, PostDominanceInfo *pdi);

   void setVaried(SILValue value, unsigned independentVariableIndex);
   void setVariedAcrossArrayInitialization(SILValue value,
                                           unsigned independentVariableIndex);
   void setUseful(SILValue value, unsigned dependentVariableIndex);
   void setUsefulAcrossArrayInitialization(SILValue value,
                                           unsigned dependentVariableIndex);
   /// Marks the given value as "varied" and recursively propagates "varied"
   /// inwards (to operands) through projections. Skips any `@noDerivative`
   /// struct field projections.
   void propagateVariedInwardsThroughProjections(
       SILValue value, unsigned independentVariableIndex);
   void propagateUsefulThroughBuffer(SILValue value,
                                     unsigned dependentVariableIndex);

 public:
   explicit DifferentiableActivityInfo(
       DifferentiableActivityCollection &parent,
       GenericSignature derivativeGenericSignature);

   bool isVaried(SILValue value, unsigned independentVariableIndex) const;
   bool isUseful(SILValue value, unsigned dependentVariableIndex) const;
   bool isVaried(SILValue value, IndexSubset *parameterIndices) const;
   bool isActive(SILValue value, const SILAutoDiffIndices &indices) const;

   Activity getActivity(SILValue value,
                        const SILAutoDiffIndices &indices) const;
   Activity getActivity(SILInstruction *inst,
                        const SILAutoDiffIndices &indices) const;
 };

 /// Given a parameter argument (not indirect result) and some differentiation
 /// indices, figure out whether the parent function is being differentiated with
 /// respect to this parameter, according to the indices.
 static bool isDifferentiationParameter(SILArgument *argument,
                                        IndexSubset *indices) {
   if (!argument) return false;
   auto *function = argument->getFunction();
   auto paramArgs = function->getArgumentsWithoutIndirectResults();
   for (unsigned i : indices->getIndices())
     if (paramArgs[i] == argument)
       return true;
   return false;
 }

 /// For an `apply` instruction with active results, compute:
 /// - The results of the `apply` instruction, in type order.
 /// - The set of minimal parameter and result indices for differentiating the
 ///   `apply` instruction.
 static void collectMinimalIndicesForFunctionCall(
     ApplyInst *ai, const SILAutoDiffIndices &parentIndices,
     const DifferentiableActivityInfo &activityInfo,
     SmallVectorImpl<SILValue> &results, SmallVectorImpl<unsigned> &paramIndices,
     SmallVectorImpl<unsigned> &resultIndices) {
   auto calleeFnTy = ai->getSubstCalleeType();
   auto calleeConvs = ai->getSubstCalleeConv();
   // Parameter indices are indices (in the callee type signature) of parameter
   // arguments that are varied or are arguments.
   // Record all parameter indices in type order.
   unsigned currentParamIdx = 0;
   for (auto applyArg : ai->getArgumentsWithoutIndirectResults()) {
     if (activityInfo.isVaried(applyArg, parentIndices.parameters) ||
         isDifferentiationParameter(dyn_cast<SILArgument>(applyArg),
                                    parentIndices.parameters))
       paramIndices.push_back(currentParamIdx);
     ++currentParamIdx;
   }
   // Result indices are indices (in the callee type signature) of results that
   // are useful.
   SmallVector<SILValue, 8> directResults;
   forEachApplyDirectResult(ai, [&](SILValue directResult) {
     directResults.push_back(directResult);
   });
   auto indirectResults = ai->getIndirectSILResults();
   // Record all results and result indices in type order.
   results.reserve(calleeFnTy->getNumResults());
   unsigned dirResIdx = 0;
   unsigned indResIdx = calleeConvs.getSILArgIndexOfFirstIndirectResult();
   for (auto &resAndIdx : enumerate(calleeConvs.getResults())) {
     auto &res = resAndIdx.value();
     unsigned idx = resAndIdx.index();
     if (res.isFormalDirect()) {
       results.push_back(directResults[dirResIdx]);
       if (auto dirRes = directResults[dirResIdx])
         if (dirRes && activityInfo.isUseful(dirRes, parentIndices.source))
           resultIndices.push_back(idx);
       ++dirResIdx;
     } else {
       results.push_back(indirectResults[indResIdx]);
       if (activityInfo.isUseful(indirectResults[indResIdx],
                                 parentIndices.source))
         resultIndices.push_back(idx);
       ++indResIdx;
     }
   }
   // Make sure the function call has active results.
   assert(results.size() == calleeFnTy->getNumResults());
   assert(llvm::any_of(results, [&](SILValue result) {
     return activityInfo.isActive(result, parentIndices);
   }));
 }

 LinearMapInfo::LinearMapInfo(ADContext &context,
                              AutoDiffLinearMapKind kind,
                              SILFunction *original, SILFunction *derivative,
                              const SILAutoDiffIndices &indices,
                              const DifferentiableActivityInfo &activityInfo)
     : kind(kind), original(original), derivative(derivative),
       activityInfo(activityInfo), indices(indices),
       typeConverter(context.getTypeConverter()) {
   generateDifferentiationDataStructures(context, indices, derivative);
 }

 /// Returns a flag that indicates whether the `apply` instruction should be
 /// differentiated, given the differentiation indices of the instruction's
 /// parent function. Whether the `apply` should be differentiated is determined
 /// sequentially from the following conditions:
 /// 1. The instruction has an active `inout` argument.
 /// 2. The instruction is a call to the array literal initialization intrinsic
 ///    ("array.uninitialized_intrinsic"), where the result is active and where
 ///    there is a `store` of an active value into the array's buffer.
 /// 3. The instruction has both an active result (direct or indirect) and an
 ///    active argument.
 bool LinearMapInfo::shouldDifferentiateApplyInst(ApplyInst *ai) {
   // Function applications with an inout argument should be differentiated.
   auto paramInfos = ai->getSubstCalleeConv().getParameters();
   auto arguments = ai->getArgumentsWithoutIndirectResults();
   for (auto i : swift::indices(paramInfos))
     if (paramInfos[i].isIndirectInOut() &&
         activityInfo.isActive(arguments[i], indices))
       return true;

   bool hasActiveDirectResults = false;
   forEachApplyDirectResult(ai, [&](SILValue directResult) {
     hasActiveDirectResults |= activityInfo.isActive(directResult, indices);
   });
   bool hasActiveIndirectResults = llvm::any_of(ai->getIndirectSILResults(),
       [&](SILValue result) { return activityInfo.isActive(result, indices); });
   bool hasActiveResults = hasActiveDirectResults || hasActiveIndirectResults;

   // TODO: Pattern match to make sure there is at least one `store` to the
   // array's active buffer.
   if (isArrayLiteralIntrinsic(ai) && hasActiveResults)
     return true;

   bool hasActiveArguments = llvm::any_of(arguments,
       [&](SILValue arg) { return activityInfo.isActive(arg, indices); });
   return hasActiveResults && hasActiveArguments;
 }

 /// Returns a flag indicating whether the instruction should be differentiated,
 /// given the differentiation indices of the instruction's parent function.
 /// Whether the instruction should be differentiated is determined sequentially
 /// from any of the following conditions:
 /// 1. The instruction is an `apply` and `shouldDifferentiateApplyInst` returns
 ///    true.
 /// 2. The instruction has a source operand and a destination operand, both
 ///    being active.
 /// 3. The instruction is an allocation instruction and has an active result.
 /// 4. The instruction performs reference counting, lifetime ending, access
 ///    ending, or destroying on an active operand.
 /// 5. The instruction creates an SSA copy of an active operand.
 bool LinearMapInfo::shouldDifferentiateInstruction(SILInstruction *inst) {
   // An `apply` with an active argument and an active result (direct or
   // indirect) should be differentiated.
   if (auto *ai = dyn_cast<ApplyInst>(inst))
     return shouldDifferentiateApplyInst(ai);
   // Anything with an active result and an active operand should be
   // differentiated.
   auto hasActiveOperands = llvm::any_of(inst->getAllOperands(),
       [&](Operand &op) { return activityInfo.isActive(op.get(), indices); });
   auto hasActiveResults = llvm::any_of(inst->getResults(),
       [&](SILValue val) { return activityInfo.isActive(val, indices); });
   if (hasActiveOperands && hasActiveResults)
     return true;
   // A `store`-like instruction does not have an SSA result, but has two
   // operands that represent the source and the destination. We treat them as
   // the input and the output, respectively.
 #define CHECK_INST_TYPE_ACTIVE_DEST(INST) \
   if (auto *castInst = dyn_cast<INST##Inst>(inst)) \
     return activityInfo.isActive(castInst->getDest(), indices);
   CHECK_INST_TYPE_ACTIVE_DEST(Store)
   CHECK_INST_TYPE_ACTIVE_DEST(StoreBorrow)
   CHECK_INST_TYPE_ACTIVE_DEST(CopyAddr)
   CHECK_INST_TYPE_ACTIVE_DEST(UnconditionalCheckedCastAddr)
 #undef CHECK_INST_TYPE_ACTIVE_DEST
   // Should differentiate any allocation instruction that has an active result.
   if ((isa<AllocationInst>(inst) && hasActiveResults))
     return true;
   if (hasActiveOperands) {
     // Should differentiate any instruction that performs reference counting,
     // lifetime ending, access ending, or destroying on an active operand.
     if (isa<RefCountingInst>(inst) || isa<EndAccessInst>(inst) ||
         isa<EndBorrowInst>(inst) || isa<DeallocationInst>(inst) ||
         isa<DestroyValueInst>(inst) || isa<DestroyAddrInst>(inst))
       return true;
     // Should differentiate any instruction that creates an SSA copy of an
     // active operand.
     if (isa<CopyValueInst>(inst))
       return true;
   }
   return false;
 }

 /// Takes an `apply` instruction and adds its linear map function to the
 /// linear map struct if it is active.
 void LinearMapInfo::addLinearMapToStruct(ADContext &context, ApplyInst *ai,
                                          const SILAutoDiffIndices &indices) {
   SmallVector<SILValue, 4> allResults;
   SmallVector<unsigned, 8> activeParamIndices;
   SmallVector<unsigned, 8> activeResultIndices;
   collectMinimalIndicesForFunctionCall(
       ai, indices, activityInfo, allResults, activeParamIndices,
       activeResultIndices);

   // Check if there are any active results or arguments. If not, skip
   // this instruction.
   auto hasActiveResults = llvm::any_of(allResults, [&](SILValue res) {
     return activityInfo.isActive(res, indices);
   });
   auto hasActiveArguments = llvm::any_of(
       ai->getArgumentsWithoutIndirectResults(), [&](SILValue arg) {
     return activityInfo.isActive(arg, indices);
   });
   if (!hasActiveResults || !hasActiveArguments)
     return;

   // Compute differentiation result index.
   auto source = activeResultIndices.front();
   // Compute differentiation parameters.
   // - If the callee has `@differentiable` function type, use differentiation
   //   parameters from the function type.
   // - Otherwise, use the active parameters.
   IndexSubset *parameters;
   auto origFnSubstTy = ai->getSubstCalleeType();
   auto remappedOrigFnSubstTy =
       remapTypeInDerivative(SILType::getPrimitiveObjectType(origFnSubstTy))
           .castTo<SILFunctionType>();
   if (remappedOrigFnSubstTy->isDifferentiable()) {
     parameters = remappedOrigFnSubstTy->getDifferentiationParameterIndices();
   } else {
     parameters = IndexSubset::get(
         original->getASTContext(),
         ai->getArgumentsWithoutIndirectResults().size(),
         activeParamIndices);
   }
   // Create autodiff indices for the `apply` instruction.
   SILAutoDiffIndices applyIndices(source, parameters);

   // Check for non-differentiable original function type.
   auto checkNondifferentiableOriginalFunctionType =
       [&](CanSILFunctionType origFnTy) {
         // Check non-differentiable arguments.
         for (unsigned paramIndex : range(origFnTy->getNumParameters())) {
           auto remappedParamType =
               origFnTy->getParameters()[paramIndex].getSILStorageType();
           if (applyIndices.isWrtParameter(paramIndex) &&
               !remappedParamType.isDifferentiable(derivative->getModule()))
             return true;
         }
         // Check non-differentiable results.
         auto remappedResultType =
             origFnTy->getResults()[applyIndices.source].getSILStorageType();
         if (!remappedResultType.isDifferentiable(derivative->getModule()))
           return true;
         return false;
       };
   if (checkNondifferentiableOriginalFunctionType(remappedOrigFnSubstTy))
     return;

   AutoDiffDerivativeFunctionKind derivativeFnKind(kind);
   auto derivativeFnType = remappedOrigFnSubstTy->getAutoDiffDerivativeFunctionType(
       parameters, source, derivativeFnKind, context.getTypeConverter(),
       LookUpConformanceInModule(derivative->getModule().getSwiftModule()));

   auto derivativeFnResultTypes =
       derivativeFnType->getAllResultsType().castTo<TupleType>();
   derivativeFnResultTypes->getElement(derivativeFnResultTypes->getElements().size() - 1);
   auto linearMapSILType = SILType::getPrimitiveObjectType(
       derivativeFnResultTypes
           ->getElement(derivativeFnResultTypes->getElements().size() - 1)
           .getType()
           ->getCanonicalType());
   addLinearMapDecl(ai, linearMapSILType);
 }

 void LinearMapInfo::generateDifferentiationDataStructures(
     ADContext &context, const SILAutoDiffIndices &indices,
     SILFunction *derivativeFn) {
   auto &astCtx = original->getASTContext();
   auto *loopAnalysis = context.getPassManager().getAnalysis<SILLoopAnalysis>();
   auto *loopInfo = loopAnalysis->get(original);

   // Get the derivative function generic signature.
   CanGenericSignature derivativeFnGenSig = nullptr;
   if (auto *derivativeFnGenEnv = derivativeFn->getGenericEnvironment())
     derivativeFnGenSig =
         derivativeFnGenEnv->getGenericSignature()->getCanonicalSignature();

   // Create linear map struct for each original block.
   for (auto &origBB : *original) {
     auto *linearMapStruct =
         createLinearMapStruct(&origBB, indices, derivativeFnGenSig);
     linearMapStructs.insert({&origBB, linearMapStruct});
   }

   // Create branching trace enum for each original block and add it as a field
   // in the corresponding struct.
   StringRef traceEnumFieldName;
   switch (kind) {
   case AutoDiffLinearMapKind::Differential:
     traceEnumFieldName = "successor";
     break;
   case AutoDiffLinearMapKind::Pullback:
     traceEnumFieldName = "predecessor";
     break;
   }
   for (auto &origBB : *original) {
     auto *traceEnum =
         createBranchingTraceDecl(&origBB, indices, derivativeFnGenSig, loopInfo);
     branchingTraceDecls.insert({&origBB, traceEnum});
     if (origBB.isEntry())
       continue;
     // Add branching trace enum field to corresponding linear map struct.
     auto *linearMapStruct = getLinearMapStruct(&origBB);
     auto *traceEnumField =
         addVarDecl(linearMapStruct,
                    astCtx.getIdentifier(traceEnumFieldName).str(),
                    traceEnum->getDeclaredInterfaceType());
     linearMapStructEnumFields.insert({linearMapStruct, traceEnumField});
   }

   // Add linear map fields to the linear map structs.
   for (auto &origBB : *original) {
     for (auto &inst : origBB) {
       if (auto *ai = dyn_cast<ApplyInst>(&inst)) {
         // Check for active 'inout' arguments.
         bool isInout = false;
         auto paramInfos = ai->getSubstCalleeConv().getParameters();
         for (unsigned i : swift::indices(paramInfos)) {
           if (paramInfos[i].isIndirectInOut() &&
               activityInfo.isActive(ai->getArgumentsWithoutIndirectResults()[i],
                                     indices)) {
             // Reject functions with active inout arguments. It's not yet
             // supported.
             isInout = true;
             break;
           }
         }
         if (isInout)
           continue;

         // Add linear map field to struct for active `apply` instructions.
         // Skip array literal intrinsic applications since array literal
         // initialization is linear and handled separately.
         if (!shouldDifferentiateApplyInst(ai) || isArrayLiteralIntrinsic(ai))
           continue;

         LLVM_DEBUG(getADDebugStream() << "Adding linear map struct field for "
                                       << *ai);
         addLinearMapToStruct(context, ai, indices);
       }
     }
   }

   // Print generated linear map structs and branching trace enums.
   // These declarations do not show up with `-emit-sil` because they are
   // implicit. Instead, use `-Xllvm -debug-only=differentiation` to test
   // declarations with FileCheck.
   LLVM_DEBUG({
     auto &s = getADDebugStream();
     PrintOptions printOptions;
     printOptions.TypeDefinitions = true;
     printOptions.ExplodePatternBindingDecls = true;
     printOptions.SkipImplicit = false;
     s << "Generated linear map structs and branching trace enums for @"
       << original->getName() << ":\n";
     for (auto &origBB : *original) {
       auto *linearMapStruct = getLinearMapStruct(&origBB);
       linearMapStruct->print(s, printOptions); s << '\n';
     }
     for (auto &origBB : *original) {
       auto *traceEnum = getBranchingTraceDecl(&origBB);
       traceEnum->print(s, printOptions); s << '\n';
     }
   });
 }

 class DifferentiableActivityCollection {
 public:
   SmallDenseMap<GenericSignature, DifferentiableActivityInfo> activityInfoMap;
   SILFunction &function;
   DominanceInfo *domInfo;
   PostDominanceInfo *postDomInfo;

   DifferentiableActivityInfo &getActivityInfo(
       GenericSignature assocGenSig, AutoDiffDerivativeFunctionKind kind) {
     auto activityInfoLookup = activityInfoMap.find(assocGenSig);
     if (activityInfoLookup != activityInfoMap.end())
       return activityInfoLookup->getSecond();
     auto insertion = activityInfoMap.insert(
         {assocGenSig, DifferentiableActivityInfo(*this, assocGenSig)});
     return insertion.first->getSecond();
   }

   explicit DifferentiableActivityCollection(SILFunction &f,
                                             DominanceInfo *di,
                                             PostDominanceInfo *pdi);
 };

 } // end anonymous namespace

 std::unique_ptr<DifferentiableActivityCollection>
 DifferentiableActivityAnalysis::newFunctionAnalysis(SILFunction *f) {
   assert(dominanceAnalysis && "Expect a valid dominance anaysis");
   assert(postDominanceAnalysis && "Expect a valid post-dominance anaysis");
   return llvm::make_unique<DifferentiableActivityCollection>(
       *f, dominanceAnalysis->get(f), postDominanceAnalysis->get(f));
 }

 void DifferentiableActivityAnalysis::initialize(SILPassManager *pm) {
   dominanceAnalysis = pm->getAnalysis<DominanceAnalysis>();
   postDominanceAnalysis = pm->getAnalysis<PostDominanceAnalysis>();
 }

 SILAnalysis *swift::createDifferentiableActivityAnalysis(SILModule *m) {
   return new DifferentiableActivityAnalysis();
 }

 DifferentiableActivityCollection::DifferentiableActivityCollection(
     SILFunction &f, DominanceInfo *di, PostDominanceInfo *pdi)
     : function(f), domInfo(di), postDomInfo(pdi) {}

 DifferentiableActivityInfo::DifferentiableActivityInfo(
     DifferentiableActivityCollection &parent, GenericSignature derivGenSig)
     : parent(parent), derivativeGenericSignature(derivGenSig) {
   analyze(parent.domInfo, parent.postDomInfo);
 }

 SILFunction &DifferentiableActivityInfo::getFunction() {
   return parent.function;
 }

 void DifferentiableActivityInfo::analyze(DominanceInfo *di,
                                          PostDominanceInfo *pdi) {
   auto &function = getFunction();
   LLVM_DEBUG(getADDebugStream()
              << "Running activity analysis on @" << function.getName() << '\n');
   // Inputs are just function's arguments, count `n`.
   auto paramArgs = function.getArgumentsWithoutIndirectResults();
   for (auto value : paramArgs)
     inputValues.push_back(value);
   LLVM_DEBUG({
     auto &s = getADDebugStream();
     s << "Inputs in @" << function.getName() << ":\n";
     for (auto val : inputValues)
       s << val << '\n';
   });
   // Outputs are indirect result buffers and return values, count `m`.
   collectAllFormalResultsInTypeOrder(function, outputValues);
   LLVM_DEBUG({
     auto &s = getADDebugStream();
     s << "Outputs in @" << function.getName() << ":\n";
     for (auto val : outputValues)
       s << val << '\n';
   });

   // Mark inputs as varied.
   assert(variedValueSets.empty());
   for (auto input : inputValues)
     variedValueSets.push_back({input});
   // Propagate varied-ness through the function in dominance order.
   DominanceOrder domOrder(function.getEntryBlock(), di);
   while (auto *bb = domOrder.getNext()) {
     for (auto &inst : *bb) {
       for (auto i : indices(inputValues)) {
         // Handle `apply`.
         if (auto *ai = dyn_cast<ApplyInst>(&inst)) {
           // If callee is non-varying, skip.
           if (isWithoutDerivative(ai->getCallee()))
             continue;
           // If any argument is varied, set all direct and indirect results as
           // varied.
           for (auto arg : ai->getArgumentsWithoutIndirectResults()) {
             if (isVaried(arg, i)) {
               for (auto indRes : ai->getIndirectSILResults())
                 setVaried(indRes, i);
               forEachApplyDirectResult(ai, [&](SILValue directResult) {
                 setVaried(directResult, i);
               });
             }
           }
         }
         // Handle store-like instructions:
         //   `store`, `store_borrow`, `copy_addr`, `unconditional_checked_cast`
 #define PROPAGATE_VARIED_THROUGH_STORE(INST) \
         else if (auto *si = dyn_cast<INST##Inst>(&inst)) { \
           if (isVaried(si->getSrc(), i)) \
             propagateVariedInwardsThroughProjections(si->getDest(), i); \
         }
         PROPAGATE_VARIED_THROUGH_STORE(Store)
         PROPAGATE_VARIED_THROUGH_STORE(StoreBorrow)
         PROPAGATE_VARIED_THROUGH_STORE(CopyAddr)
         PROPAGATE_VARIED_THROUGH_STORE(UnconditionalCheckedCastAddr)
 #undef PROPAGATE_VARIED_THROUGH_STORE
         // Handle `tuple_element_addr`.
         else if (auto *teai = dyn_cast<TupleElementAddrInst>(&inst)) {
           if (isVaried(teai->getOperand(), i)) {
             auto projType = teai->getType().getASTType();
             if (derivativeGenericSignature && projType->hasArchetype())
               projType = derivativeGenericSignature->getCanonicalTypeInContext(
                   projType->mapTypeOutOfContext());
             if (projType->getAutoDiffAssociatedTangentSpace(
                     getLookupConformanceFunction()))
               setVaried(teai, i);
           }
         }
         // Handle `struct_extract` and `struct_element_addr` instructions.
         // - If the field is marked `@noDerivative`, do not set the result as
         // varied because it is not in the set of differentiable variables.
         // - Otherwise, propagate variedness from operand to result as usual.
 #define PROPAGATE_VARIED_FOR_STRUCT_EXTRACTION(INST) \
         else if (auto *sei = dyn_cast<INST##Inst>(&inst)) { \
           if (isVaried(sei->getOperand(), i) && \
               !sei->getField()->getAttrs().hasAttribute<NoDerivativeAttr>()) \
             setVaried(sei, i); \
         }
         PROPAGATE_VARIED_FOR_STRUCT_EXTRACTION(StructExtract)
         PROPAGATE_VARIED_FOR_STRUCT_EXTRACTION(StructElementAddr)
 #undef PROPAGATE_VARIED_FOR_STRUCT_EXTRACTION
         // Handle `br`.
         else if (auto *bi = dyn_cast<BranchInst>(&inst)) {
           for (auto &op : bi->getAllOperands())
             if (isVaried(op.get(), i))
               setVaried(bi->getArgForOperand(&op), i);
         }
         // Handle `cond_br`.
         else if (auto *cbi = dyn_cast<CondBranchInst>(&inst)) {
           for (unsigned opIdx : indices(cbi->getTrueOperands())) {
             auto &op = cbi->getTrueOperands()[opIdx];
             if (isVaried(op.get(), i))
               setVaried(cbi->getTrueBB()->getArgument(opIdx), i);
           }
           for (unsigned opIdx : indices(cbi->getFalseOperands())) {
             auto &op = cbi->getFalseOperands()[opIdx];
             if (isVaried(op.get(), i))
               setVaried(cbi->getFalseBB()->getArgument(opIdx), i);
           }
         }
         // Handle `switch_enum`.
         else if (auto *sei = dyn_cast<SwitchEnumInst>(&inst)) {
           if (isVaried(sei->getOperand(), i))
             for (auto *succBB : sei->getSuccessorBlocks())
               for (auto *arg : succBB->getArguments())
                 setVaried(arg, i);
         }
         // Handle everything else.
         else {
           for (auto &op : inst.getAllOperands())
             if (isVaried(op.get(), i))
               for (auto result : inst.getResults())
                 setVaried(result, i);
         }
       }
     }
     domOrder.pushChildren(bb);
   }

   // Mark differentiable outputs as useful.
   assert(usefulValueSets.empty());
   for (auto output : outputValues) {
     usefulValueSets.push_back({});
     // If the output has an address or class type, propagate usefulness
     // recursively.
     if (output->getType().isAddress() ||
         output->getType().isClassOrClassMetatype())
       propagateUsefulThroughBuffer(output, usefulValueSets.size() - 1);
     // Otherwise, just mark the output as useful.
     else
       setUseful(output, usefulValueSets.size() - 1);
   }
   // Propagate usefulness through the function in post-dominance order.
   PostDominanceOrder postDomOrder(&*function.findReturnBB(), pdi);
   while (auto *bb = postDomOrder.getNext()) {
     for (auto &inst : reversed(*bb)) {
       for (auto i : indices(outputValues)) {
         // Handle indirect results in `apply`.
         if (auto *ai = dyn_cast<ApplyInst>(&inst)) {
           if (isWithoutDerivative(ai->getCallee()))
             continue;
           auto checkAndSetUseful = [&](SILValue res) {
             if (isUseful(res, i))
               for (auto arg : ai->getArgumentsWithoutIndirectResults())
                 setUseful(arg, i);
           };
           for (auto dirRes : ai->getResults())
             checkAndSetUseful(dirRes);
           for (auto indRes : ai->getIndirectSILResults())
             checkAndSetUseful(indRes);
           auto paramInfos = ai->getSubstCalleeConv().getParameters();
           for (auto i : indices(paramInfos))
             if (paramInfos[i].isIndirectInOut())
               checkAndSetUseful(ai->getArgumentsWithoutIndirectResults()[i]);
         }
         // Handle store-like instructions:
         //   `store`, `store_borrow`, `copy_addr`, `unconditional_checked_cast`
 #define PROPAGATE_USEFUL_THROUGH_STORE(INST, PROPAGATE) \
         else if (auto *si = dyn_cast<INST##Inst>(&inst)) { \
           if (isUseful(si->getDest(), i)) \
             PROPAGATE(si->getSrc(), i); \
         }
         PROPAGATE_USEFUL_THROUGH_STORE(Store, setUseful)
         PROPAGATE_USEFUL_THROUGH_STORE(StoreBorrow, setUseful)
         PROPAGATE_USEFUL_THROUGH_STORE(CopyAddr, propagateUsefulThroughBuffer)
         PROPAGATE_USEFUL_THROUGH_STORE(UnconditionalCheckedCastAddr,
                                        propagateUsefulThroughBuffer)
 #undef PROPAGATE_USEFUL_THROUGH_STORE
         // Handle struct element extraction, skipping `@noDerivative` fields:
         //   `struct_extract`, `struct_element_addr`.
 #define PROPAGATE_USEFUL_THROUGH_STRUCT_EXTRACTION(INST, PROPAGATE) \
         else if (auto *sei = dyn_cast<INST##Inst>(&inst)) { \
           if (isUseful(sei, i)) { \
             auto hasNoDeriv = sei->getField()->getAttrs() \
                 .hasAttribute<NoDerivativeAttr>(); \
             if (!hasNoDeriv) \
               PROPAGATE(sei->getOperand(), i); \
           } \
         }
         PROPAGATE_USEFUL_THROUGH_STRUCT_EXTRACTION(StructExtract, setUseful)
         PROPAGATE_USEFUL_THROUGH_STRUCT_EXTRACTION(StructElementAddr,
                                                    propagateUsefulThroughBuffer)
 #undef PROPAGATE_USEFUL_THROUGH_STRUCT_EXTRACTION
         // Handle everything else.
         else if (llvm::any_of(inst.getResults(),
           [&](SILValue res) { return isUseful(res, i); })) {
           for (auto &op : inst.getAllOperands()) {
             auto value = op.get();
             if (value->getType().isAddress())
               propagateUsefulThroughBuffer(value, i);
             setUseful(value, i);
           }
         }
       }
     }
     // Propagate usefulness from basic block arguments to incoming phi values.
     for (auto i : indices(outputValues)) {
       for (auto *arg : bb->getArguments()) {
         if (isUseful(arg, i)) {
           SmallVector<SILValue, 4> incomingValues;
           arg->getSingleTerminatorOperands(incomingValues);
           for (auto incomingValue : incomingValues)
             setUseful(incomingValue, i);
         }
       }
     }
     postDomOrder.pushChildren(bb);
   }
 }

 void DifferentiableActivityInfo::setVariedAcrossArrayInitialization(
     SILValue value, unsigned independentVariableIndex) {
   auto uai = getAllocateUninitializedArrayIntrinsic(value);
   if (!uai) return;
   for (auto use : value->getUses())
     if (auto *dti = dyn_cast<DestructureTupleInst>(use->getUser()))
       // The first tuple field of the intrinsic's return value is the array.
       setVaried(dti->getResult(0), independentVariableIndex);
 }

 void DifferentiableActivityInfo::setUsefulAcrossArrayInitialization(
     SILValue value, unsigned dependentVariableIndex) {
   // Array initializer syntax is lowered to an intrinsic and one or more
   // stores to a `RawPointer` returned by the intrinsic.
   auto uai = getAllocateUninitializedArrayIntrinsic(value);
   if (!uai) return;
   for (auto use : value->getUses()) {
     auto dti = dyn_cast<DestructureTupleInst>(use->getUser());
     if (!dti) continue;
     // The second tuple field of the return value is the `RawPointer`.
     for (auto use : dti->getResult(1)->getUses()) {
       // The `RawPointer` passes through a `pointer_to_address`. That
       // instruction's first use is a `store` whose src is useful; its
       // subsequent uses are `index_addr`s whose only use is a useful `store`.
       for (auto use : use->getUser()->getResult(0)->getUses()) {
         auto inst = use->getUser();
         if (auto si = dyn_cast<StoreInst>(inst)) {
           setUseful(si->getSrc(), dependentVariableIndex);
         } else if (auto iai = dyn_cast<IndexAddrInst>(inst)) {
           for (auto use : iai->getUses())
             if (auto si = dyn_cast<StoreInst>(use->getUser()))
               setUseful(si->getSrc(), dependentVariableIndex);
         }
       }
     }
   }
 }

 void DifferentiableActivityInfo::setVaried(SILValue value,
                                            unsigned independentVariableIndex) {
   variedValueSets[independentVariableIndex].insert(value);
   setVariedAcrossArrayInitialization(value, independentVariableIndex);
 }

 void DifferentiableActivityInfo::setUseful(SILValue value,
                                            unsigned dependentVariableIndex) {
   usefulValueSets[dependentVariableIndex].insert(value);
   setUsefulAcrossArrayInitialization(value, dependentVariableIndex);
 }

 void DifferentiableActivityInfo::propagateVariedInwardsThroughProjections(
     SILValue value, unsigned independentVariableIndex) {
 #define SKIP_NODERIVATIVE(INST) \
   if (auto *sei = dyn_cast<INST##Inst>(value)) \
     if (sei->getField()->getAttrs().hasAttribute<NoDerivativeAttr>()) \
       return;
   SKIP_NODERIVATIVE(StructExtract)
   SKIP_NODERIVATIVE(StructElementAddr)
 #undef SKIP_NODERIVATIVE
   setVaried(value, independentVariableIndex);
   auto *inst = value->getDefiningInstruction();
   if (!inst || isa<ApplyInst>(inst))
     return;
   // Standard propagation.
   for (auto &op : inst->getAllOperands())
     propagateVariedInwardsThroughProjections(
         op.get(), independentVariableIndex);
 }

 void DifferentiableActivityInfo::propagateUsefulThroughBuffer(
     SILValue value, unsigned dependentVariableIndex) {
   assert(value->getType().isAddress() ||
          value->getType().isClassOrClassMetatype());
   // Check whether value is already useful to prevent infinite recursion.
   if (isUseful(value, dependentVariableIndex))
     return;
   setUseful(value, dependentVariableIndex);
   if (auto *inst = value->getDefiningInstruction())
     for (auto &operand : inst->getAllOperands())
       if (operand.get()->getType().isAddress())
         propagateUsefulThroughBuffer(operand.get(), dependentVariableIndex);
   // Recursively propagate usefulness through users that are projections or
   // `begin_access` instructions.
   for (auto use : value->getUses()) {
     for (auto res : use->getUser()->getResults()) {
 #define SKIP_NODERIVATIVE(INST) \
       if (auto *sei = dyn_cast<INST##Inst>(res)) \
         if (sei->getField()->getAttrs().hasAttribute<NoDerivativeAttr>()) \
           continue;
       SKIP_NODERIVATIVE(StructExtract)
       SKIP_NODERIVATIVE(StructElementAddr)
 #undef SKIP_NODERIVATIVE
       if (Projection::isAddressProjection(res) || isa<BeginAccessInst>(res))
         propagateUsefulThroughBuffer(res, dependentVariableIndex);
     }
   }
 }

 bool DifferentiableActivityInfo::isVaried(
     SILValue value, unsigned independentVariableIndex) const {
   assert(independentVariableIndex < variedValueSets.size() &&
          "Independent variable index out of range");
   auto &set = variedValueSets[independentVariableIndex];
   return set.count(value);
 }

 bool DifferentiableActivityInfo::isVaried(
     SILValue value, IndexSubset *parameterIndices) const {
   for (auto paramIdx : parameterIndices->getIndices())
     if (isVaried(value, paramIdx))
       return true;
   return false;
 }

 bool DifferentiableActivityInfo::isUseful(
     SILValue value, unsigned dependentVariableIndex) const {
   assert(dependentVariableIndex < usefulValueSets.size() &&
          "Dependent variable index out of range");
   auto &set = usefulValueSets[dependentVariableIndex];
   return set.count(value);
 }

 bool DifferentiableActivityInfo::isActive(
     SILValue value, const SILAutoDiffIndices &indices) const {
   return isVaried(value, indices.parameters) && isUseful(value, indices.source);
 }

 Activity DifferentiableActivityInfo::getActivity(
     SILValue value, const SILAutoDiffIndices &indices) const {
   Activity activity;
   if (isVaried(value, indices.parameters))
     activity |= ActivityFlags::Varied;
   if (isUseful(value, indices.source))
     activity |= ActivityFlags::Useful;
   return activity;
 }

 Activity DifferentiableActivityInfo::getActivity(
     SILInstruction *inst, const SILAutoDiffIndices &indices) const {
   Activity activity;
   for (auto result : inst->getResults())
     activity |= getActivity(result, indices);
   return activity;
 }

 static void dumpActivityInfo(SILValue value,
                              const SILAutoDiffIndices &indices,
                              const DifferentiableActivityInfo &activityInfo,
                              llvm::raw_ostream &s = llvm::dbgs()) {
   s << '[';
   auto activity = activityInfo.getActivity(value, indices);
   switch (activity.toRaw()) {
   case 0: s << "NONE"; break;
   case (unsigned)ActivityFlags::Varied: s << "VARIED"; break;
   case (unsigned)ActivityFlags::Useful: s << "USEFUL"; break;
   case (unsigned)ActivityFlags::Active: s << "ACTIVE"; break;
   }
   s << "] " << value;
 }

 static void dumpActivityInfo(SILFunction &fn,
                              const SILAutoDiffIndices &indices,
                              const DifferentiableActivityInfo &activityInfo,
                              llvm::raw_ostream &s = llvm::dbgs()) {
   s << "Activity info for " << fn.getName() << " at " << indices << '\n';
   for (auto &bb : fn) {
     s << "bb" << bb.getDebugID() << ":\n";
     for (auto *arg : bb.getArguments())
       dumpActivityInfo(arg, indices, activityInfo, s);
     for (auto &inst : bb)
       for (auto res : inst.getResults())
         dumpActivityInfo(res, indices, activityInfo, s);
     s << '\n';
   }
 }

 /// If the original function doesn't have a return, it cannot be differentiated.
 /// Returns true if error is emitted.
 static bool diagnoseNoReturn(ADContext &context, SILFunction *original,
                              DifferentiationInvoker invoker) {
   if (original->findReturnBB() != original->end())
     return false;
   context.emitNondifferentiabilityError(
       original->getLocation().getEndSourceLoc(), invoker,
       diag::autodiff_missing_return);
   return true;
 }

 /// If the original function contains unsupported control flow, emit a "control
 /// flow unsupported" error at appropriate source locations. Returns true if
 /// error is emitted.
 ///
 /// Update as control flow support is added. Currently, branching terminators
 /// other than `br`, `cond_br`, `switch_enum` are not supported.
 static bool diagnoseUnsupportedControlFlow(ADContext &context,
                                            SILFunction *original,
                                            DifferentiationInvoker invoker) {
   if (original->getBlocks().size() <= 1)
     return false;
   // Diagnose unsupported branching terminators.
   for (auto &bb : *original) {
     auto *term = bb.getTerminator();
     // Supported terminators are: `br`, `cond_br`, `switch_enum`.
     if (isa<BranchInst>(term) || isa<CondBranchInst>(term) ||
         isa<SwitchEnumInst>(term))
       continue;
     // If terminator is an unsupported branching terminator, emit an error.
     if (term->isBranch()) {
       context.emitNondifferentiabilityError(
           term, invoker, diag::autodiff_control_flow_not_supported);
       return true;
     }
   }
   return false;
 }

 /// Check whether the given requirements are satisfied, with the given
 /// derivative generic signature (containing requirements), original function,
 /// and substitution map. Returns true if error is emitted.
 static bool diagnoseUnsatisfiedRequirements(ADContext &context,
                                             GenericSignature derivativeGenSig,
                                             SILFunction *original,
                                             SubstitutionMap substMap,
                                             DifferentiationInvoker invoker,
                                             SourceLoc loc) {
   // If there are no derivative requirements, return false.
   if (!derivativeGenSig)
     return false;
   auto requirements = derivativeGenSig->getRequirements();
   if (requirements.empty())
     return false;
   // Iterate through all requirements and check whether they are satisfied.
   auto *swiftModule = context.getModule().getSwiftModule();
   SmallVector<Requirement, 2> unsatisfiedRequirements;
   for (auto req : requirements) {
     auto firstType = req.getFirstType();
     Type secondType;
     // Substitute first and second types using the given substitution map,
     // looking up conformances in the current module, if possible.
     if (auto substFirstType =
             firstType.subst(QuerySubstitutionMap{substMap},
                             LookUpConformanceInModule(swiftModule))) {
       firstType = substFirstType;
     }
     if (req.getKind() != RequirementKind::Layout) {
       secondType = req.getSecondType();
       if (auto substSecondType =
               secondType.subst(QuerySubstitutionMap{substMap},
                                LookUpConformanceInModule(swiftModule))) {
         secondType = substSecondType;
       }
     }
     switch (req.getKind()) {
     // Check layout requirements.
     case RequirementKind::Layout: {
       auto layout = req.getLayoutConstraint();
       switch (layout->getKind()) {
       case LayoutConstraintKind::Class:
         if (!firstType->satisfiesClassConstraint())
           unsatisfiedRequirements.push_back(req);
         continue;
       default:
         // TODO: Check other layout requirements. Note that `@differentiable`
         // attribute type-checking does not yet support layout requirements in
         // where clauses; layout requirements in derivative generic signatures
         // can be formed only from `differentiable_function` instructions whose
         // original function operand is generic with layout requirements.
         break;
       }
       continue;
     }
     // Check same type requirements.
     case RequirementKind::SameType:
       // If the first type does not equal the second type, then record the
       // unsatisfied requirement.
       if (!firstType->isEqual(secondType))
         unsatisfiedRequirements.push_back(req);
       continue;
     // Check superclass requirements.
     case RequirementKind::Superclass: {
       // If the second type is not an exact superclass of second type, then
       // record the unsatisfied requirement.
       if (!secondType->isExactSuperclassOf(firstType))
         unsatisfiedRequirements.push_back(req);
       continue;
     }
     // Check conformance requirements.
     case RequirementKind::Conformance: {
       auto protocolType = req.getSecondType()->castTo<ProtocolType>();
       auto protocol = protocolType->getDecl();
       assert(protocol && "Expected protocol in generic signature requirement");
       // If the first type does not conform to the second type in the current
       // module, then record the unsatisfied requirement.
       if (!swiftModule->lookupConformance(firstType, protocol))
         unsatisfiedRequirements.push_back(req);
       continue;
     }
     }
   }
   if (unsatisfiedRequirements.empty())
     return false;
   // Diagnose unsatisfied requirements.
   std::string reqText;
   llvm::raw_string_ostream stream(reqText);
   interleave(unsatisfiedRequirements,
              [&](Requirement req) { req.print(stream, PrintOptions()); },
              [&] { stream << ", "; });
   context.emitNondifferentiabilityError(
       loc, invoker, diag::autodiff_function_assoc_func_unmet_requirements,
       stream.str());
   return true;
 }

 //===----------------------------------------------------------------------===//
 // Code emission utilities
 //===----------------------------------------------------------------------===//

 /// Given a value, extracts all elements to `results` from this value if it has
 /// a tuple type. Otherwise, add this value directly to `results`.
 static void extractAllElements(SILValue value, SILBuilder &builder,
                                SmallVectorImpl<SILValue> &results) {
   auto tupleType = value->getType().getAs<TupleType>();
   if (!tupleType) {
     results.push_back(value);
     return;
   }
   if (builder.hasOwnership()) {
     auto *dti = builder.createDestructureTuple(value.getLoc(), value);
     results.append(dti->getResults().begin(), dti->getResults().end());
     return;
   }
   for (auto i : range(tupleType->getNumElements()))
     results.push_back(builder.createTupleExtract(value.getLoc(), value, i));
 }

 /// Given a range of elements, joins these into a single value. If there's
 /// exactly one element, returns that element. Otherwise, creates a tuple using
 /// a `tuple` instruction.
 static SILValue joinElements(ArrayRef<SILValue> elements, SILBuilder &builder,
                              SILLocation loc) {
   if (elements.size() == 1)
     return elements.front();
   return builder.createTuple(loc, elements);
 }

 /// Given an apply site, emit copies of all parameters and place them in
 /// `copiedArgs`. Any buffers that need to be destroyed will be added to
 /// `newArgsToDestroy`. Any new buffers that need to be deallocated will be
 /// added to `newBuffersToDealloc`. This helper is used for duplicating an
 /// apply site.
 static void copyParameterArgumentsForApply(
     ApplySite applySite, SmallVectorImpl<SILValue> &copiedArgs,
     SmallVectorImpl<SILValue> &newArgsToDestroy,
     SmallVectorImpl<AllocStackInst *> &newBuffersToDealloc) {
   LLVM_DEBUG({
     auto &s = getADDebugStream() << "Copying arguments from apply site: ";
     applySite.getInstruction()->print(s);
   });
   auto loc = applySite.getLoc();
   copiedArgs.reserve(applySite.getNumArguments());
   SILBuilder copyBuilder(applySite.getInstruction());
   for (auto &argOperand : applySite.getArgumentOperands()) {
     auto arg = argOperand.get();
     auto argConv = applySite.getArgumentConvention(argOperand);
     auto collectNewArg = [&](SILValue newArg) {
       copiedArgs.push_back(newArg);
       if (argConv.isGuaranteedConvention() &&
           argConv != SILArgumentConvention::Indirect_InoutAliasable)
         newArgsToDestroy.push_back(newArg);
     };
     // Copy the argument if it's to be owned by the newly created closure.
     // Objects are to be retained.
     if (arg->getType().isObject()) {
       auto newArg = copyBuilder.emitCopyValueOperation(loc, arg);
       collectNewArg(newArg);
       continue;
     }
     // Addresses depend on argument conventions.
     // If the argument is an aliasable inout reference, do not copy the
     // argument since it's a `@noescape` capture.
     if (argConv == SILArgumentConvention::Indirect_InoutAliasable) {
       collectNewArg(arg);
       continue;
     }
     // Otherwise, it must be address-only. Create a new buffer and perform
     // `copy_addr`.
     auto *argCopy = copyBuilder.createAllocStack(loc, arg->getType());
     newBuffersToDealloc.push_back(argCopy);
     copyBuilder.createCopyAddr(loc, arg, argCopy, IsNotTake,
                                IsInitialization);
     collectNewArg(argCopy);
   }
 }

 /// When a function value is used in an instruction (usually `apply`), there's
 /// some conversion instruction in between, e.g. `thin_to_thick_function`. Given
 /// a new function value and an old function value, this helper function
 /// recursively converts the new function just like how the old function is
 /// converted. If the new function's generic signature is specified, it is used
 /// to create substitution maps for reapplied `partial_apply` instructions.
 static SILValue
 reapplyFunctionConversion(
     SILValue newFunc, SILValue oldFunc, SILValue oldConvertedFunc,
     SILBuilder &builder, SILLocation loc,
     SmallVectorImpl<AllocStackInst *> &newBuffersToDealloc,
     GenericSignature newFuncGenSig = GenericSignature()) {
   // If the old func is the new func, then there's no conversion.
   if (oldFunc == oldConvertedFunc)
     return newFunc;
   // Handle a few instruction cases.
   // thin_to_thick_function
   if (auto *tttfi = dyn_cast<ThinToThickFunctionInst>(oldConvertedFunc)) {
     auto innerNewFunc = reapplyFunctionConversion(
         newFunc, oldFunc, tttfi->getOperand(), builder, loc,
         newBuffersToDealloc, newFuncGenSig);
     auto operandFnTy = innerNewFunc->getType().castTo<SILFunctionType>();
     auto thickTy = operandFnTy->getWithRepresentation(
         SILFunctionTypeRepresentation::Thick);
     auto silTy = SILType::getPrimitiveObjectType(thickTy);
     return builder.createThinToThickFunction(loc, innerNewFunc, silTy);
   }
   // partial_apply
   if (auto *pai = dyn_cast<PartialApplyInst>(oldConvertedFunc)) {
     SmallVector<SILValue, 8> newArgs;
     newArgs.reserve(pai->getNumArguments());
     SmallVector<SILValue, 1> newArgsToDestroy;
     copyParameterArgumentsForApply(pai, newArgs, newArgsToDestroy,
                                    newBuffersToDealloc);
     auto innerNewFunc = reapplyFunctionConversion(
         newFunc, oldFunc, pai->getCallee(), builder, loc, newBuffersToDealloc,
         newFuncGenSig);
     // If new function's generic signature is specified, use it to create
     // substitution map for reapplied `partial_apply` instruction.
     auto substMap = !newFuncGenSig
         ? pai->getSubstitutionMap()
         : SubstitutionMap::get(
               newFuncGenSig, QuerySubstitutionMap{pai->getSubstitutionMap()},
               LookUpConformanceInModule(builder.getModule().getSwiftModule()));
     return builder.createPartialApply(loc, innerNewFunc, substMap, newArgs,
                                       ParameterConvention::Direct_Guaranteed);
   }
   llvm_unreachable("Unhandled function conversion instruction");
 }

 /// Emits a reference to a derivative function of `original`, differentiated
 /// with respect to a superset of `desiredIndices`. Returns the `SILValue` for
 /// the derivative function and the actual indices that the derivative function
 /// is with respect to.
 ///
 /// Returns `None` on failure, signifying that a diagnostic has been emitted.
 ///
 /// Creates new differentiation tasks, if necessary, using `invoker` as the
 /// invoker. Calls `taskCallback` for all newly-created tasks (but may also call
 /// `taskCallback` for already-existing tasks), so that the caller can make sure
 /// that the task actually gets executed.
 ///
 /// FIXME: This is too complicated and needs to be rewritten.
 static Optional<std::pair<SILValue, SILAutoDiffIndices>>
 emitDerivativeFunctionReference(
     ADContext &context, SILBuilder &builder, SILAutoDiffIndices desiredIndices,
     AutoDiffDerivativeFunctionKind kind, SILValue original,
     DifferentiationInvoker invoker,
     SmallVectorImpl<AllocStackInst *> &newBuffersToDealloc) {

   SILValue functionSource = original;

   // If `original` is itself an `DifferentiableFunctionExtractInst` whose kind matches
   // the given kind and desired differentiation parameter indices, simply
   // extract the derivative function of its function operand, retain the
   // derivative function, and return it.
   if (auto *inst = original->getDefiningInstruction())
     if (auto *dfei = dyn_cast<DifferentiableFunctionExtractInst>(inst))
       if (dfei->getExtractee() ==
               NormalDifferentiableFunctionTypeComponent::Original)
         functionSource = dfei->getFunctionOperand();

   // If `functionSource` is a `@differentiable` function, just extract the
   // derivative function.
   if (auto diffableFnType =
           functionSource->getType().castTo<SILFunctionType>()) {
     if (diffableFnType->isDifferentiable()) {
       auto paramIndices = diffableFnType->getDifferentiationParameterIndices();
       for (auto i : desiredIndices.parameters->getIndices()) {
         if (!paramIndices->contains(i)) {
           context.emitNondifferentiabilityError(functionSource, invoker,
               diag::autodiff_function_nondiff_parameter_not_differentiable);
           return None;
         }
       }
       auto borrowedDiffFunc = builder.emitBeginBorrowOperation(
           functionSource.getLoc(), functionSource);
       SILValue derivativeFn = builder.createDifferentiableFunctionExtract(
           borrowedDiffFunc.getLoc(), kind, borrowedDiffFunc);
       derivativeFn =
           builder.emitCopyValueOperation(functionSource.getLoc(), derivativeFn);
       builder.emitEndBorrowOperation(functionSource.getLoc(), borrowedDiffFunc);
       SILAutoDiffIndices indices(0, desiredIndices.parameters);
       return std::make_pair(derivativeFn, indices);
     }
   }

   // Find local function reference.
   if (auto *originalFRI =
           peerThroughFunctionConversions<FunctionRefInst>(original)) {
     auto loc = originalFRI->getLoc();
     auto *originalFn = originalFRI->getReferencedFunctionOrNull();
     // Attempt to look up a `[differentiable]` attribute that minimally
     // satisfies the specified indices.
     // TODO(TF-482): Change `lookUpMinimalDifferentiableAttr` to additionally
     // check whether `[differentiable]` attribute generic requirements are
     // satisfied.
     auto *minimalAttr =
         context.lookUpMinimalDifferentiableAttr(originalFn, desiredIndices);
     if (!minimalAttr) {
       // If the function is intentionally marked as being opaque to
       // differentiation, then we should not create a task for it.
       if (originalFn->hasSemanticsAttr("autodiff.opaque")) {
         context.emitNondifferentiabilityError(original, invoker,
             diag::autodiff_opaque_function_not_differentiable);
         return None;
       }
       // Check and diagnose non-differentiable arguments.
       auto originalFnTy = originalFn->getLoweredFunctionType();
       for (unsigned paramIndex : range(originalFnTy->getNumParameters())) {
         if (desiredIndices.isWrtParameter(paramIndex) &&
             !originalFnTy->getParameters()[paramIndex]
                  .getSILStorageType()
                  .isDifferentiable(context.getModule())) {
           auto diag = context.emitNondifferentiabilityError(
               original, invoker, diag::autodiff_nondifferentiable_argument);
           return None;
         }
       }
       // Check and diagnose non-differentiable results.
       if (!originalFnTy->getResults()[desiredIndices.source]
                .getSILStorageType()
                .isDifferentiable(context.getModule())) {
         context.emitNondifferentiabilityError(
             original, invoker, diag::autodiff_nondifferentiable_result);
         return None;
       }
       // Check and diagnose external declarations.
       if (originalFn->isExternalDeclaration()) {
         context.emitNondifferentiabilityError(
             original, invoker,
             diag::autodiff_external_nondifferentiable_function);
         return None;
       }
       // Sanity check passed. Create a new `[differentiable]` attribute and
       // process it it.
       GenericSignature contextualDerivativeGenSig = GenericSignature();
       if (invoker.getKind() ==
           DifferentiationInvoker::Kind::IndirectDifferentiation)
         contextualDerivativeGenSig = invoker.getIndirectDifferentiation().second
             ->getDerivativeGenericSignature();
       auto *newAttr = context.getOrCreateDifferentiableAttr(
           originalFn, desiredIndices, contextualDerivativeGenSig);
       if (context.processDifferentiableAttribute(originalFn, newAttr, invoker))
         return None;
       minimalAttr = newAttr;
     }
     assert(minimalAttr);
     // TODO(TF-482): Move generic requirement checking logic to
     // `lookUpMinimalDifferentiableAttr`.
     // Get the substitution map for checking unmet generic requirements.
     // By default, use the forwarding substitution map of the original function.
     // If the original callee is a `partial_apply` or `apply` instruction, use
     // its substitution map instead.
     auto substMap = original->getFunction()->getForwardingSubstitutionMap();
     if (auto *pai = dyn_cast<PartialApplyInst>(original)) {
       substMap = pai->getSubstitutionMap();
     } else if (auto *ai = dyn_cast<ApplyInst>(original)) {
       substMap = ai->getSubstitutionMap();
     }
     if (diagnoseUnsatisfiedRequirements(
             context, minimalAttr->getDerivativeGenericSignature(), originalFn,
             substMap, invoker, original.getLoc().getSourceLoc()))
       return None;
     if (context.processDifferentiableAttribute(
             originalFn, minimalAttr, invoker))
       return None;
     SILFunction *derivativeFn = nullptr;
     switch (kind) {
     case AutoDiffDerivativeFunctionKind::JVP:
       assert(!minimalAttr->getJVPName().empty() && "Expected JVP name");
       derivativeFn = context.getModule().lookUpFunction(minimalAttr->getJVPName());
       break;
     case AutoDiffDerivativeFunctionKind::VJP:
       assert(!minimalAttr->getVJPName().empty() && "Expected VJP name");
       derivativeFn = context.getModule().lookUpFunction(minimalAttr->getVJPName());
       break;
     }
     auto *derivativeFnRef = builder.createFunctionRef(loc, derivativeFn);
     // FIXME(TF-201): Handle direct differentiation of reabstraction thunks.
     // Tentative solution: clone a new reabstraction thunk where function
     // argument has a `@differentiable` function type.
     if (originalFn->isThunk() == IsReabstractionThunk) {
       // Handle here.
     }
     auto convertedRef = reapplyFunctionConversion(
         derivativeFnRef, originalFRI, original, builder, loc,
         newBuffersToDealloc,
         derivativeFn->getLoweredFunctionType()->getGenericSignature());
     return std::make_pair(convertedRef, minimalAttr->getIndices());
   }

   // Find witness method retrieval.
   if (auto *witnessMethod =
           peerThroughFunctionConversions<WitnessMethodInst>(original)) {
     auto loc = witnessMethod->getLoc();
     auto requirementDeclRef = witnessMethod->getMember();
     auto *requirementDecl = requirementDeclRef.getDecl();
     auto witnessMethodType = witnessMethod->getType().castTo<SILFunctionType>();
     // If requirement declaration does not have any `@differentiable`
     // attributes, produce an error.
     if (!requirementDecl->getAttrs().hasAttribute<DifferentiableAttr>()) {
       context.emitNondifferentiabilityError(
           original, invoker, diag::autodiff_protocol_member_not_differentiable);
       return None;
     }
     // Get the minimal `@differentiable` attribute and parameter index subset.
     const DifferentiableAttr *minimalAttr;
     IndexSubset *minimalParamIndexSet;
     std::tie(minimalAttr, minimalParamIndexSet) =
         context.lookUpMinimalASTDifferentiableAttrAndIndexSubset(
             requirementDeclRef, witnessMethodType, desiredIndices);
     SILAutoDiffIndices minimalIndices(/*source*/ 0, minimalParamIndexSet);
     // If minimal `@differentiable` attribute does not exist, then no attribute
     // exists with a superset of the desired indices. Produce an error.
     if (!minimalAttr) {
       context.emitNondifferentiabilityError(
           original, invoker,
           diag::autodiff_member_subset_indices_not_differentiable);
       return None;
     }
     // Emit a `witness_method` instruction for the derivative function.
     auto originalType = witnessMethod->getType().castTo<SILFunctionType>();
     auto assocType = originalType->getAutoDiffDerivativeFunctionType(
         minimalIndices.parameters, minimalIndices.source,
         kind, context.getTypeConverter(),
         LookUpConformanceInModule(builder.getModule().getSwiftModule()));
     auto *autoDiffFuncId = AutoDiffDerivativeFunctionIdentifier::get(
         kind, minimalAttr->getParameterIndices(), context.getASTContext());
     auto *ref = builder.createWitnessMethod(
         loc, witnessMethod->getLookupType(), witnessMethod->getConformance(),
         requirementDeclRef.asAutoDiffDerivativeFunction(autoDiffFuncId),
         SILType::getPrimitiveObjectType(assocType));
     auto convertedRef =
         reapplyFunctionConversion(ref, witnessMethod, original, builder, loc,
                                   newBuffersToDealloc);
     return std::make_pair(convertedRef, minimalIndices);
   }

   // Find class method.
   if (auto *classMethodInst =
           peerThroughFunctionConversions<ClassMethodInst>(original)) {
     auto loc = classMethodInst->getLoc();
     auto methodDeclRef = classMethodInst->getMember();
     auto *methodDecl = methodDeclRef.getDecl();
     auto classMethodType = classMethodInst->getType().castTo<SILFunctionType>();
     // If method declaration does not have any `@differentiable` attributes,
     // produce an error.
     if (!methodDecl->getAttrs().hasAttribute<DifferentiableAttr>()) {
       context.emitNondifferentiabilityError(
           original, invoker, diag::autodiff_class_member_not_differentiable);
       return None;
     }
     // Get the minimal `@differentiable` attribute and parameter index subset.
     const DifferentiableAttr *minimalAttr;
     IndexSubset *minimalParamIndexSet;
     std::tie(minimalAttr, minimalParamIndexSet) =
         context.lookUpMinimalASTDifferentiableAttrAndIndexSubset(
             methodDeclRef, classMethodType, desiredIndices);
     SILAutoDiffIndices minimalIndices(/*source*/ 0, minimalParamIndexSet);
     // If minimal `@differentiable` attribute does not exist, then no attribute
     // exists with a superset of the desired indices. Produce an error.
     if (!minimalAttr) {
       context.emitNondifferentiabilityError(
           original, invoker,
           diag::autodiff_member_subset_indices_not_differentiable);
       return None;
     }
     // Emit a `class_method` instruction for the derivative function.
     auto originalType = classMethodInst->getType().castTo<SILFunctionType>();
     auto assocType = originalType->getAutoDiffDerivativeFunctionType(
         minimalIndices.parameters, minimalIndices.source,
         kind, context.getTypeConverter(),
         LookUpConformanceInModule(builder.getModule().getSwiftModule()));
     auto *autoDiffFuncId = AutoDiffDerivativeFunctionIdentifier::get(
         kind, minimalAttr->getParameterIndices(),
         context.getASTContext());
     auto *ref = builder.createClassMethod(
         loc, classMethodInst->getOperand(),
         methodDeclRef.asAutoDiffDerivativeFunction(autoDiffFuncId),
         SILType::getPrimitiveObjectType(assocType));
     auto convertedRef =
         reapplyFunctionConversion(ref, classMethodInst, original, builder, loc,
                                   newBuffersToDealloc);
     return std::make_pair(convertedRef, minimalIndices);
   }

   // Emit the general opaque function error.
   context.emitNondifferentiabilityError(original, invoker,
       diag::autodiff_opaque_function_not_differentiable);
   return None;
 }

 /// Emit a zero value into the given buffer access by calling
 /// `AdditiveArithmetic.zero`. The given type must conform to
 /// `AdditiveArithmetic`.
 static void emitZeroIntoBuffer(
     SILBuilder &builder, CanType type, SILValue bufferAccess,
     SILLocation loc) {
   auto &astCtx = builder.getASTContext();
   auto *swiftMod = builder.getModule().getSwiftModule();
   auto &typeConverter = builder.getModule().Types;
   // Look up conformance to `AdditiveArithmetic`.
   auto *additiveArithmeticProto =
       astCtx.getProtocol(KnownProtocolKind::AdditiveArithmetic);
   auto confRef = swiftMod->lookupConformance(type, additiveArithmeticProto);
   assert(confRef.hasValue() && "Missing conformance to `AdditiveArithmetic`");
   // Look up `AdditiveArithmetic.zero.getter`.
   auto zeroDeclLookup = additiveArithmeticProto->lookupDirect(astCtx.Id_zero);
   auto *zeroDecl = cast<VarDecl>(zeroDeclLookup.front());
   assert(zeroDecl->isProtocolRequirement());
   auto *accessorDecl = zeroDecl->getAccessor(AccessorKind::Get);
   SILDeclRef accessorDeclRef(accessorDecl, SILDeclRef::Kind::Func);
   auto silFnType = typeConverter.getConstantType(accessorDeclRef);
   // %wm = witness_method ...
   auto *getter = builder.createWitnessMethod(
       loc, type, *confRef, accessorDeclRef, silFnType);
   // %metatype = metatype $T
   auto metatypeType = CanMetatypeType::get(
       type, MetatypeRepresentation::Thick);
   auto metatype = builder.createMetatype(
       loc, SILType::getPrimitiveObjectType(metatypeType));
   auto subMap = SubstitutionMap::getProtocolSubstitutions(
       additiveArithmeticProto, type, *confRef);
   builder.createApply(loc, getter, subMap, {bufferAccess, metatype},
                       /*isNonThrowing*/ false);
   builder.emitDestroyValueOperation(loc, getter);
 }

 //===----------------------------------------------------------------------===//
 // Thunk helpers
 //===----------------------------------------------------------------------===//
 // These helpers are copied/adapted from SILGen. They should be refactored and
 // moved to a shared location.
 //===----------------------------------------------------------------------===//

 static CanGenericSignature
 buildThunkSignature(SILFunction *fn,
                     bool inheritGenericSig,
                     OpenedArchetypeType *openedExistential,
                     GenericEnvironment *&genericEnv,
                     SubstitutionMap &contextSubs,
                     SubstitutionMap &interfaceSubs,
                     ArchetypeType *&newArchetype) {
   // If there's no opened existential, we just inherit the generic environment
   // from the parent function.
   if (openedExistential == nullptr) {
     auto genericSig = fn->getLoweredFunctionType()->getGenericSignature();
     genericEnv = fn->getGenericEnvironment();
     interfaceSubs = fn->getForwardingSubstitutionMap();
     contextSubs = interfaceSubs;
     return genericSig;
   }

   auto &ctx = fn->getASTContext();
   GenericSignatureBuilder builder(ctx);

   // Add the existing generic signature.
   int depth = 0;
   if (inheritGenericSig) {
     if (auto genericSig =
             fn->getLoweredFunctionType()->getGenericSignature()) {
       builder.addGenericSignature(genericSig);
       depth = genericSig->getGenericParams().back()->getDepth() + 1;
     }
   }

   // Add a new generic parameter to replace the opened existential.
   auto *newGenericParam = GenericTypeParamType::get(depth, 0, ctx);

   builder.addGenericParameter(newGenericParam);
   Requirement newRequirement(RequirementKind::Conformance, newGenericParam,
                              openedExistential->getOpenedExistentialType());
   auto source =
       GenericSignatureBuilder::FloatingRequirementSource::forAbstract();
   builder.addRequirement(newRequirement, source, nullptr);

   auto genericSig = std::move(builder).computeGenericSignature(
       SourceLoc(), /*allowConcreteGenericParams=*/true);
   genericEnv = genericSig->getGenericEnvironment();

   newArchetype = genericEnv->mapTypeIntoContext(newGenericParam)
       ->castTo<ArchetypeType>();

   // Calculate substitutions to map the caller's archetypes to the thunk's
   // archetypes.
   if (auto calleeGenericSig =
           fn->getLoweredFunctionType()->getGenericSignature()) {
     contextSubs = SubstitutionMap::get(
         calleeGenericSig,
         [&](SubstitutableType *type) -> Type {
           return genericEnv->mapTypeIntoContext(type);
         },
         MakeAbstractConformanceForGenericType());
   }

   // Calculate substitutions to map interface types to the caller's archetypes.
   interfaceSubs = SubstitutionMap::get(
       genericSig,
       [&](SubstitutableType *type) -> Type {
         if (type->isEqual(newGenericParam))
           return openedExistential;
         return fn->mapTypeIntoContext(type);
       },
       MakeAbstractConformanceForGenericType());

   return genericSig->getCanonicalSignature();

 }

 /// The thunk kinds used in the differentiation transform.
 enum class DifferentiationThunkKind {
   /// A reabstraction thunk.
   ///
   /// Reabstraction thunks transform a function-typed value to another one with
   /// different parameter/result abstraction patterns. This is identical to the
   /// thunks generated by SILGen.
   Reabstraction,

   /// An index subset thunk.
   ///
   /// An index subset thunk is used transform JVP/VJPs into a version that is
   /// "wrt" fewer differentiation parameters.
   /// - Differentials of thunked JVPs use zero for non-requested differentiation
   //    parameters.
   /// - Pullbacks of thunked VJPs discard results for non-requested
   ///   differentiation parameters.
   IndexSubset
 };

 /// Build the type of a function transformation thunk.
 static CanSILFunctionType buildThunkType(SILFunction *fn,
                                          CanSILFunctionType &sourceType,
                                          CanSILFunctionType &expectedType,
                                          GenericEnvironment *&genericEnv,
                                          SubstitutionMap &interfaceSubs,
                                          bool withoutActuallyEscaping,
                                          DifferentiationThunkKind thunkKind) {
   assert(!expectedType->isPolymorphic());
   assert(!sourceType->isPolymorphic());

   auto &module = fn->getModule();
   auto origType = sourceType;

   // Cannot build a reabstraction thunk without context. Ownership semantics
   // on the result type are required.
   if (thunkKind == DifferentiationThunkKind::Reabstraction)
     assert(expectedType->getExtInfo().hasContext());

   // This may inherit @noescape from the expected type. The `@noescape`
   // attribute is only stripped when using this type to materialize a new decl.
   // Use `@convention(thin)` if:
   // - Building a reabstraction thunk type.
   // - Building an index subset thunk type, where the expected type has context
   //   (i.e. is `@convention(thick)`).
   auto extInfo = expectedType->getExtInfo();
   if (thunkKind == DifferentiationThunkKind::Reabstraction ||
       extInfo.hasContext()) {
     extInfo = extInfo.withRepresentation(
         SILFunctionType::Representation::Thin);
   }
   if (withoutActuallyEscaping)
     extInfo = extInfo.withNoEscape(false);

   // Does the thunk type involve archetypes other than opened existentials?
   bool hasArchetypes = false;
   // Does the thunk type involve an open existential type?
   CanOpenedArchetypeType openedExistential;
   auto archetypeVisitor = [&](CanType t) {
     if (auto archetypeTy = dyn_cast<OpenedArchetypeType>(t)) {
       if (auto opened = dyn_cast<OpenedArchetypeType>(archetypeTy)) {
         assert((openedExistential == CanArchetypeType() ||
                 openedExistential == opened) &&
                "one too many open existentials");
         openedExistential = opened;
       } else {
         hasArchetypes = true;
       }
     }
   };

   // Use the generic signature from the context if the thunk involves
   // generic parameters.
   CanGenericSignature genericSig;
   SubstitutionMap contextSubs;
   ArchetypeType *newArchetype = nullptr;

   if (expectedType->hasArchetype() || sourceType->hasArchetype()) {
     expectedType.visit(archetypeVisitor);
     sourceType.visit(archetypeVisitor);
     genericSig = buildThunkSignature(
         fn, hasArchetypes, openedExistential, genericEnv, contextSubs,
         interfaceSubs, newArchetype);
   }

   // Utility function to apply contextSubs, and also replace the
   // opened existential with the new archetype.
   auto substIntoThunkContext = [&](CanType t) -> CanType {
     return t.subst(
         [&](SubstitutableType *type) -> Type {
           if (CanType(type) == openedExistential)
             return newArchetype;
           return Type(type).subst(contextSubs);
         },
         LookUpConformanceInSubstitutionMap(contextSubs),
         SubstFlags::AllowLoweredTypes)->getCanonicalType();
   };

   sourceType = cast<SILFunctionType>(substIntoThunkContext(sourceType));
   expectedType = cast<SILFunctionType>(substIntoThunkContext(expectedType));

   // If our parent function was pseudogeneric, this thunk must also be
   // pseudogeneric, since we have no way to pass generic parameters.
   if (genericSig)
     if (origType->isPseudogeneric())
       extInfo = extInfo.withIsPseudogeneric();

   // Add the function type as the parameter.
   auto contextConvention =
       SILType::getPrimitiveObjectType(sourceType).isTrivial(*fn)
           ? ParameterConvention::Direct_Unowned
           : ParameterConvention::Direct_Guaranteed;
   SmallVector<SILParameterInfo, 4> params;
   params.append(expectedType->getParameters().begin(),
                 expectedType->getParameters().end());
   // Add reabstraction function parameter only if building a reabstraction thunk
   // type.
   if (thunkKind == DifferentiationThunkKind::Reabstraction)
     params.push_back({sourceType, sourceType->getExtInfo().hasContext()
                                       ? contextConvention
                                       : ParameterConvention::Direct_Unowned});

   // Map the parameter and expected types out of context to get the interface
   // type of the thunk.
   SmallVector<SILParameterInfo, 4> interfaceParams;
   interfaceParams.reserve(params.size());
   for (auto &param : params) {
     auto paramIfaceTy = param.getType()->mapTypeOutOfContext();
     interfaceParams.push_back(SILParameterInfo(
         paramIfaceTy->getCanonicalType(genericSig), param.getConvention()));
   }

   SmallVector<SILYieldInfo, 4> interfaceYields;
   for (auto &yield : expectedType->getYields()) {
     auto yieldIfaceTy = yield.getType()->mapTypeOutOfContext();
     auto interfaceYield =
         yield.getWithType(yieldIfaceTy->getCanonicalType(genericSig));
     interfaceYields.push_back(interfaceYield);
   }

   SmallVector<SILResultInfo, 4> interfaceResults;
   for (auto &result : expectedType->getResults()) {
     auto resultIfaceTy = result.getType()->mapTypeOutOfContext();
     auto interfaceResult =
         result.getWithType(resultIfaceTy->getCanonicalType(genericSig));
     interfaceResults.push_back(interfaceResult);
   }

   Optional<SILResultInfo> interfaceErrorResult;
   if (expectedType->hasErrorResult()) {
     auto errorResult = expectedType->getErrorResult();
     auto errorIfaceTy = errorResult.getType()->mapTypeOutOfContext();
     interfaceErrorResult =
         SILResultInfo(errorIfaceTy->getCanonicalType(genericSig),
                       expectedType->getErrorResult().getConvention());
   }

   // The type of the thunk function.
   return SILFunctionType::get(
       genericSig, extInfo, expectedType->getCoroutineKind(),
       ParameterConvention::Direct_Unowned, interfaceParams, interfaceYields,
       interfaceResults, interfaceErrorResult, module.getASTContext());
 }

 /// Get or create a reabstraction thunk from `fromType` to `toType`, to be
 /// called in `caller`.
 static SILFunction *getOrCreateReabstractionThunk(SILOptFunctionBuilder &fb,
                                                   SILModule &module,
                                                   SILLocation loc,
                                                   SILFunction *caller,
                                                   CanSILFunctionType fromType,
                                                   CanSILFunctionType toType) {
   SubstitutionMap interfaceSubs;
   GenericEnvironment *genericEnv = nullptr;
   auto thunkType = buildThunkType(
       caller, fromType, toType, genericEnv, interfaceSubs,
       /*withoutActuallyEscaping*/ false,
       DifferentiationThunkKind::Reabstraction);
   auto thunkDeclType =
       thunkType->getWithExtInfo(thunkType->getExtInfo().withNoEscape(false));

   auto fromInterfaceType = fromType->mapTypeOutOfContext()->getCanonicalType();
   auto toInterfaceType = toType->mapTypeOutOfContext()->getCanonicalType();

   Mangle::ASTMangler mangler;
   std::string name = mangler.mangleReabstractionThunkHelper(
       thunkType, fromInterfaceType, toInterfaceType,
       Type(), module.getSwiftModule());

   auto *thunk = fb.getOrCreateSharedFunction(
       loc, name, thunkDeclType, IsBare, IsTransparent, IsSerialized,
       ProfileCounter(), IsReabstractionThunk, IsNotDynamic);
   if (!thunk->empty())
     return thunk;

   thunk->setGenericEnvironment(genericEnv);
   thunk->setOwnershipEliminated();
   auto *entry = thunk->createBasicBlock();
   SILBuilder builder(entry);
   createEntryArguments(thunk);

   SILFunctionConventions fromConv(fromType, module);
   SILFunctionConventions toConv(toType, module);
   assert(toConv.useLoweredAddresses());

   auto *fnArg = thunk->getArgumentsWithoutIndirectResults().back();

   SmallVector<SILValue, 4> arguments;
   auto toArgIter = thunk->getArguments().begin();
   auto useNextArgument = [&]() {
     arguments.push_back(*toArgIter++);
   };

   SmallVector<AllocStackInst *, 4> localAllocations;
   auto createAllocStack = [&](SILType type) {
     auto *alloc = builder.createAllocStack(loc, type);
     localAllocations.push_back(alloc);
     return alloc;
   };

   // Handle indirect results.
   assert(fromType->getNumResults() == toType->getNumResults());
   for (unsigned resIdx : range(toType->getNumResults())) {
     auto fromRes = fromConv.getResults()[resIdx];
     auto toRes = toConv.getResults()[resIdx];
     // No abstraction mismatch.
     if (fromRes.isFormalIndirect() == toRes.isFormalIndirect()) {
       // If result types are indirect, directly pass as next argument.
       if (toRes.isFormalIndirect())
         useNextArgument();
       continue;
     }
     // Convert indirect result to direct result.
     if (fromRes.isFormalIndirect()) {
       SILType resultTy = fromConv.getSILType(fromRes);
       assert(resultTy.isAddress());
       auto *indRes = createAllocStack(resultTy);
       arguments.push_back(indRes);
       continue;
     }
     // Convert direct result to indirect result.
     // Increment thunk argument iterator; reabstraction handled later.
     toArgIter++;
   }

   // Reabstract parameters.
   assert(toType->getNumParameters() == fromType->getNumParameters());
   for (unsigned paramIdx : range(toType->getNumParameters())) {
     auto fromParam = fromConv.getParameters()[paramIdx];
     auto toParam = toConv.getParameters()[paramIdx];
     // No abstraction mismatch. Directly use next argument.
     if (fromParam.isFormalIndirect() == toParam.isFormalIndirect()) {
       useNextArgument();
       continue;
     }
     // Convert indirect parameter to direct parameter.
     if (fromParam.isFormalIndirect()) {
       auto paramTy = fromConv.getSILType(fromType->getParameters()[paramIdx]);
       if (!paramTy.hasArchetype())
         paramTy = thunk->mapTypeIntoContext(paramTy);
       assert(paramTy.isAddress());
       auto *toArg = *toArgIter++;
       auto *buf = createAllocStack(toArg->getType());
       builder.createStore(loc, toArg, buf,
                           StoreOwnershipQualifier::Unqualified);
       arguments.push_back(buf);
       continue;
     }
     // Convert direct parameter to indirect parameter.
     assert(toParam.isFormalIndirect());
     auto *toArg = *toArgIter++;
     auto *load = builder.createLoad(loc, toArg,
                                     LoadOwnershipQualifier::Unqualified);
     arguments.push_back(load);
   }

   auto *apply = builder.createApply(
       loc, fnArg, SubstitutionMap(), arguments, /*isNonThrowing*/ false);

   // Get return elements.
   SmallVector<SILValue, 4> results;
   // Extract all direct results.
   SmallVector<SILValue, 4> directResults;
   extractAllElements(apply, builder, directResults);

   auto fromDirResultsIter = directResults.begin();
   auto fromIndResultsIter = apply->getIndirectSILResults().begin();
   auto toIndResultsIter = thunk->getIndirectResults().begin();
   // Reabstract results.
   for (unsigned resIdx : range(toType->getNumResults())) {
     auto fromRes = fromConv.getResults()[resIdx];
     auto toRes = toConv.getResults()[resIdx];
     // No abstraction mismatch.
     if (fromRes.isFormalIndirect() == toRes.isFormalIndirect()) {
       // If result types are direct, add call result as direct thunk result.
       if (toRes.isFormalDirect())
         results.push_back(*fromDirResultsIter++);
       // If result types are indirect, increment indirect result iterators.
       else {
         ++fromIndResultsIter;
         ++toIndResultsIter;
       }
       continue;
     }
     // Load direct results from indirect results.
     if (fromRes.isFormalIndirect()) {
       auto indRes = *fromIndResultsIter++;
       auto *load = builder.createLoad(loc, indRes,
                                       LoadOwnershipQualifier::Unqualified);
       results.push_back(load);
       continue;
     }
     // Store direct results to indirect results.
     assert(toRes.isFormalIndirect());
     SILType resultTy = toConv.getSILType(toRes);
     assert(resultTy.isAddress());
     auto indRes = *toIndResultsIter++;
     builder.createStore(loc, *fromDirResultsIter++, indRes,
                         StoreOwnershipQualifier::Unqualified);
   }
   auto retVal = joinElements(results, builder, loc);

   // Deallocate local allocations.
   for (auto *alloc : reversed(localAllocations))
     builder.createDeallocStack(loc, alloc);

   // Create return.
   builder.createReturn(loc, retVal);

   LLVM_DEBUG(auto &s = getADDebugStream() << "Created reabstraction thunk.\n";
              s << "  From type: " << fromType << '\n';
              s << "  To type: " << toType << '\n';
              s << '\n' << *thunk);

   return thunk;
 }

 namespace {
 class VJPEmitter final
     : public TypeSubstCloner<VJPEmitter, SILOptFunctionBuilder> {
   friend class PullbackEmitter;

 private:
   /// The global context.
   ADContext &context;

   /// The original function.
   SILFunction *const original;

   /// The `[differentiable]` attribute.
   SILDifferentiableAttr *const attr;

   /// The VJP function.
   SILFunction *const vjp;

   /// The pullback function.
   SILFunction *pullback;

   /// The differentiation invoker.
   DifferentiationInvoker invoker;

   /// Info from activity analysis on the original function.
   const DifferentiableActivityInfo &activityInfo;

   /// The linear map info.
   LinearMapInfo pullbackInfo;

   /// Caches basic blocks whose phi arguments have been remapped (adding a
   /// predecessor enum argument).
   SmallPtrSet<SILBasicBlock *, 4> remappedBasicBlocks;

   /// A pair of a trampoline block phi argument and its corresponding
   /// destination block phi argument.
   struct TrampolinedArgumentPair {
     SILPhiArgument *trampolineArgument;
     SILPhiArgument *destinationArgument;
   };
   /// An array that keeps track of all `@guaranteed` phi arguments in any
   /// trampoline blocks we've added. Each of these arguments needs to have a
   /// lifetime-ending use past its destination argument's lifetime-ending use,
   /// so we keep track of these pairs of arguments and emit `end_borrow`s when
   /// function cloning is finished.
   SmallVector<TrampolinedArgumentPair, 8> trampolinedGuaranteedPhiArguments;

   bool errorOccurred = false;

   /// Mapping from original blocks to pullback values. Used to build pullback
   /// struct instances.
   DenseMap<SILBasicBlock *, SmallVector<SILValue, 8>> pullbackValues;

   ASTContext &getASTContext() const { return vjp->getASTContext(); }
   SILModule &getModule() const { return vjp->getModule(); }
   const SILAutoDiffIndices &getIndices() const { return attr->getIndices(); }

   static SubstitutionMap getSubstitutionMap(SILFunction *original,
                                             SILFunction *vjp) {
     auto substMap = original->getForwardingSubstitutionMap();
     if (auto *vjpGenEnv = vjp->getGenericEnvironment()) {
       auto vjpSubstMap = vjpGenEnv->getForwardingSubstitutionMap();
       substMap = SubstitutionMap::get(
           vjpGenEnv->getGenericSignature(), QuerySubstitutionMap{vjpSubstMap},
           LookUpConformanceInSubstitutionMap(vjpSubstMap));
     }
     return substMap;
   }

   static const DifferentiableActivityInfo &getActivityInfo(
       ADContext &context, SILFunction *original,
       const SILAutoDiffIndices &indices, SILFunction *vjp) {
     // Get activity info of the original function.
     auto &passManager = context.getPassManager();
     auto *activityAnalysis =
         passManager.getAnalysis<DifferentiableActivityAnalysis>();
     auto &activityCollection = *activityAnalysis->get(original);
     auto &activityInfo = activityCollection.getActivityInfo(
         vjp->getLoweredFunctionType()->getGenericSignature(),
         AutoDiffDerivativeFunctionKind::VJP);
     LLVM_DEBUG(
         dumpActivityInfo(*original, indices, activityInfo, getADDebugStream()));
     return activityInfo;
   }

 public:
   explicit VJPEmitter(ADContext &context, SILFunction *original,
                       SILDifferentiableAttr *attr, SILFunction *vjp,
                       DifferentiationInvoker invoker)
       : TypeSubstCloner(*vjp, *original, getSubstitutionMap(original, vjp)),
         context(context), original(original), attr(attr), vjp(vjp),
         invoker(invoker), activityInfo(getActivityInfo(
                               context, original, attr->getIndices(), vjp)),
         pullbackInfo(context, AutoDiffLinearMapKind::Pullback, original, vjp,
                      attr->getIndices(), activityInfo) {
     // Create empty pullback function.
     pullback = createEmptyPullback();
     context.getGeneratedFunctions().push_back(pullback);
   }

   SILFunction *createEmptyPullback() {
     auto &module = context.getModule();
     auto origTy = original->getLoweredFunctionType();
     auto lookupConformance = LookUpConformanceInModule(module.getSwiftModule());

     // RAII that pushes the original function's generic signature to
     // `module.Types` so that the calls to `module.Types.getTypeLowering()`
     // below will know the original function's generic parameter types.
     Lowering::GenericContextScope genericContextScope(
         module.Types, origTy->getGenericSignature());

     // Given a type, returns its formal SIL parameter info.
     auto getTangentParameterInfoForOriginalResult = [&](
         CanType tanType, ResultConvention origResConv) -> SILParameterInfo {
       auto &tl = context.getTypeConverter().getTypeLowering(
           tanType, ResilienceExpansion::Minimal);
       ParameterConvention conv;
       switch (origResConv) {
       case ResultConvention::Owned:
       case ResultConvention::Autoreleased:
         conv = tl.isTrivial()
             ? ParameterConvention::Direct_Unowned
             : ParameterConvention::Direct_Guaranteed;
         break;
       case ResultConvention::Unowned:
       case ResultConvention::UnownedInnerPointer:
         conv = ParameterConvention::Direct_Unowned;
         break;
       case ResultConvention::Indirect:
         conv = ParameterConvention::Indirect_In_Guaranteed;
         break;
       }
       return {tanType, conv};
     };

     // Given a type, returns its formal SIL result info.
     auto getTangentResultInfoForOriginalParameter = [&](
         CanType tanType, ParameterConvention origParamConv) -> SILResultInfo {
       auto &tl = context.getTypeConverter().getTypeLowering(
           tanType, ResilienceExpansion::Minimal);
       ResultConvention conv;
       switch (origParamConv) {
       case ParameterConvention::Direct_Owned:
       case ParameterConvention::Direct_Guaranteed:
       case ParameterConvention::Direct_Unowned:
         conv = tl.isTrivial()
             ? ResultConvention::Unowned
             : ResultConvention::Owned;
         break;
       case ParameterConvention::Indirect_In:
       case ParameterConvention::Indirect_Inout:
       case ParameterConvention::Indirect_In_Constant:
       case ParameterConvention::Indirect_In_Guaranteed:
       case ParameterConvention::Indirect_InoutAliasable:
         conv = ResultConvention::Indirect;
         break;
       }
       return {tanType, conv};
     };

     // Parameters of the pullback are:
     // - the tangent vectors of the original results, and
     // - a pullback struct.
     // Results of the pullback are in the tangent space of the original
     // parameters.
     SmallVector<SILParameterInfo, 8> pbParams;
     SmallVector<SILResultInfo, 8> adjResults;
     auto origParams = origTy->getParameters();
     auto indices = attr->getIndices();

     // Add pullback parameter for the seed.
     auto origResInfo = origTy->getResults()[indices.source];
     pbParams.push_back(getTangentParameterInfoForOriginalResult(
         origResInfo.getType()
             ->getAutoDiffAssociatedTangentSpace(lookupConformance)
             ->getCanonicalType(), origResInfo.getConvention()));

     // Accept a pullback struct in the pullback parameter list. This is the
     // returned pullback's closure context.
     auto *origExit = &*original->findReturnBB();
     auto *pbStruct = pullbackInfo.getLinearMapStruct(origExit);
     auto pbStructType = pbStruct->getDeclaredInterfaceType()
         ->getCanonicalType();
     pbParams.push_back({pbStructType, ParameterConvention::Direct_Owned});

     // Add pullback results for the requested wrt parameters.
     for (auto i : indices.parameters->getIndices()) {
       auto origParam = origParams[i];
       adjResults.push_back(getTangentResultInfoForOriginalParameter(
           origParam.getType()
               ->getAutoDiffAssociatedTangentSpace(lookupConformance)
               ->getCanonicalType(), origParam.getConvention()));
     }

     Mangle::ASTMangler mangler;
     auto pbName = original->getASTContext().getIdentifier(
         mangler.mangleAutoDiffLinearMapHelper(
             original->getName(), AutoDiffLinearMapKind::Pullback,
             indices)).str();
     auto pbGenericSig = getDerivativeGenericSignature(attr, original);
     auto *pbGenericEnv =
         pbGenericSig ? pbGenericSig->getGenericEnvironment() : nullptr;
     auto pbType = SILFunctionType::get(
         pbGenericSig, origTy->getExtInfo(), origTy->getCoroutineKind(),
         origTy->getCalleeConvention(), pbParams, {}, adjResults, None,
         original->getASTContext());

     SILOptFunctionBuilder fb(context.getTransform());
     // The generated pullback linkage is set to Hidden because generated
     // pullbacks are never called cross-module.
     auto linkage = SILLinkage::Hidden;
     auto *pullback = fb.createFunction(
         linkage, pbName, pbType, pbGenericEnv, original->getLocation(),
         original->isBare(), IsNotTransparent, original->isSerialized(),
         original->isDynamicallyReplaceable());
     pullback->setDebugScope(new (module)
                                 SILDebugScope(original->getLocation(),
                                               pullback));
     return pullback;
   }

   /// Run VJP generation. Returns true on error.
   bool run();

   void postProcess(SILInstruction *orig, SILInstruction *cloned) {
     if (errorOccurred)
       return;
     SILClonerWithScopes::postProcess(orig, cloned);
   }

   /// Remap original basic blocks, adding predecessor enum arguments.
   SILBasicBlock *remapBasicBlock(SILBasicBlock *bb) {
     auto *vjpBB = BBMap[bb];
     // If error has occurred, or if block has already been remapped, return
     // remapped, return remapped block.
     if (errorOccurred || remappedBasicBlocks.count(bb))
       return vjpBB;
     // Add predecessor enum argument to the remapped block.
     auto *predEnum = pullbackInfo.getBranchingTraceDecl(bb);
     auto enumTy = getOpASTType(predEnum->getDeclaredInterfaceType()
                                  ->getCanonicalType());
     auto enumLoweredTy = context.getTypeConverter().getLoweredType(
         enumTy, ResilienceExpansion::Minimal);
     vjpBB->createPhiArgument(enumLoweredTy, ValueOwnershipKind::Owned);
     remappedBasicBlocks.insert(bb);
     return vjpBB;
   }

   /// General visitor for all instructions. If any error is emitted by previous
   /// visits, bail out.
   void visit(SILInstruction *inst) {
     if (errorOccurred)
       return;
     TypeSubstCloner::visit(inst);
   }

   void visitSILInstruction(SILInstruction *inst) {
     context.emitNondifferentiabilityError(inst, invoker,
         diag::autodiff_expression_not_differentiable_note);
     errorOccurred = true;
   }

 private:
   /// Get the lowered SIL type of the given nominal type declaration.
   SILType getNominalDeclLoweredType(NominalTypeDecl *nominal) {
     auto nomType = getOpASTType(
         nominal->getDeclaredInterfaceType()->getCanonicalType());
     auto nomSILType = context.getTypeConverter().getLoweredType(
         nomType, ResilienceExpansion::Minimal);
     return nomSILType;
   }

   /// Build a pullback struct value for the original block corresponding to the
   /// given terminator.
   StructInst *buildPullbackValueStructValue(TermInst *termInst) {
     assert(termInst->getFunction() == original);
     auto loc = termInst->getFunction()->getLocation();
     auto *origBB = termInst->getParent();
     auto *vjpBB = BBMap[origBB];
     auto *pbStruct = pullbackInfo.getLinearMapStruct(origBB);
     auto structLoweredTy = getNominalDeclLoweredType(pbStruct);
     auto bbPullbackValues = pullbackValues[origBB];
     if (!origBB->isEntry()) {
       auto *predEnumArg = vjpBB->getArguments().back();
       bbPullbackValues.insert(bbPullbackValues.begin(), predEnumArg);
     }
     return getBuilder().createStruct(loc, structLoweredTy, bbPullbackValues);
   }

   /// Build a predecessor enum instance using the given builder for the given
   /// original predecessor/successor blocks and pullback struct value.
   EnumInst *buildPredecessorEnumValue(SILBuilder &builder,
                                       SILBasicBlock *predBB,
                                       SILBasicBlock *succBB,
                                       SILValue pbStructVal) {
     auto loc = pbStructVal.getLoc();
     auto *succEnum = pullbackInfo.getBranchingTraceDecl(succBB);
     auto enumLoweredTy = getNominalDeclLoweredType(succEnum);
     auto *enumEltDecl =
         pullbackInfo.lookUpBranchingTraceEnumElement(predBB, succBB);
     auto enumEltType = getOpType(
         enumLoweredTy.getEnumElementType(enumEltDecl, getModule()));
     // If the enum element type does not have a box type (i.e. the enum case is
     // not indirect), then directly create an enum.
     auto boxType = dyn_cast<SILBoxType>(enumEltType.getASTType());
     if (!boxType)
       return builder.createEnum(loc, pbStructVal, enumEltDecl, enumLoweredTy);
     // Otherwise, box the pullback struct value and create an enum.
     auto *newBox = builder.createAllocBox(loc, boxType);
     builder.emitScopedBorrowOperation(
         loc, newBox, [&](SILValue borrowedBox) {
       auto *projectBox = builder.createProjectBox(loc, newBox, /*index*/ 0);
       builder.emitStoreValueOperation(loc, pbStructVal, projectBox,
                                       StoreOwnershipQualifier::Init);
     });
     return builder.createEnum(loc, newBox, enumEltDecl, enumLoweredTy);
   }

 public:
   void visitReturnInst(ReturnInst *ri) {
     auto loc = ri->getOperand().getLoc();
     auto *origExit = ri->getParent();
     auto &builder = getBuilder();
     auto *pbStructVal = buildPullbackValueStructValue(ri);

     // Get the value in the VJP corresponding to the original result.
     auto *origRetInst = cast<ReturnInst>(origExit->getTerminator());
     auto origResult = getOpValue(origRetInst->getOperand());
     SmallVector<SILValue, 8> origResults;
     extractAllElements(origResult, builder, origResults);

     // Get and partially apply the pullback.
     auto vjpGenericEnv = vjp->getGenericEnvironment();
     auto vjpSubstMap = vjpGenericEnv
         ? vjpGenericEnv->getForwardingSubstitutionMap()
         : vjp->getForwardingSubstitutionMap();
     auto *pullbackRef = builder.createFunctionRef(loc, pullback);
     auto *pullbackPartialApply = builder.createPartialApply(
         loc, pullbackRef, vjpSubstMap, {pbStructVal},
         ParameterConvention::Direct_Guaranteed);

     // Return a tuple of the original result and pullback.
     SmallVector<SILValue, 8> directResults;
     directResults.append(origResults.begin(), origResults.end());
     directResults.push_back(pullbackPartialApply);
     builder.createReturn(
         ri->getLoc(), joinElements(directResults, builder, loc));
   }

   void visitBranchInst(BranchInst *bi) {
     // Build pullback struct value for original block.
     // Build predecessor enum value for destination block.
     auto *origBB = bi->getParent();
     auto *pbStructVal = buildPullbackValueStructValue(bi);
     auto *enumVal = buildPredecessorEnumValue(
         getBuilder(), origBB, bi->getDestBB(), pbStructVal);

     // Remap arguments, appending the new enum values.
     SmallVector<SILValue, 8> args;
     for (auto origArg : bi->getArgs())
       args.push_back(getOpValue(origArg));
     args.push_back(enumVal);

     // Create a new `br` instruction.
     getBuilder().createBranch(
         bi->getLoc(), getOpBasicBlock(bi->getDestBB()), args);
   }

   void visitCondBranchInst(CondBranchInst *cbi) {
     // Build pullback struct value for original block.
     // Build predecessor enum values for true/false blocks.
     auto *origBB = cbi->getParent();
     auto *pbStructVal = buildPullbackValueStructValue(cbi);

     // Creates a trampoline block for given original successor block. The
     // trampoline block has the same arguments as the VJP successor block but
     // drops the last predecessor enum argument. The generated `switch_enum`
     // instruction branches to the trampoline block, and the trampoline block
     // constructs a predecessor enum value and branches to the VJP successor
     // block.
     auto createTrampolineBasicBlock =
         [&](SILBasicBlock *origSuccBB) -> SILBasicBlock * {
       auto *vjpSuccBB = getOpBasicBlock(origSuccBB);
       // Create the trampoline block.
       auto *trampolineBB = vjp->createBasicBlockBefore(vjpSuccBB);
       for (auto *arg : vjpSuccBB->getArguments().drop_back())
         trampolineBB->createPhiArgument(arg->getType(),
                                         arg->getOwnershipKind());
       // Build predecessor enum value for successor block and branch to it.
       SILBuilder trampolineBuilder(trampolineBB);
       auto *succEnumVal = buildPredecessorEnumValue(
           trampolineBuilder, origBB, origSuccBB, pbStructVal);
       SmallVector<SILValue, 4> forwardedArguments(
           trampolineBB->getArguments().begin(),
           trampolineBB->getArguments().end());
       forwardedArguments.push_back(succEnumVal);
       trampolineBuilder.createBranch(cbi->getLoc(), vjpSuccBB,
                                      forwardedArguments);
       return trampolineBB;
     };

     // Create a new `cond_br` instruction.
     getBuilder().createCondBranch(
         cbi->getLoc(), getOpValue(cbi->getCondition()),
         createTrampolineBasicBlock(cbi->getTrueBB()),
         createTrampolineBasicBlock(cbi->getFalseBB()));
   }

   void visitSwitchEnumInst(SwitchEnumInst *sei) {
     // Build pullback struct value for original block.
     auto *origBB = sei->getParent();
     auto *pbStructVal = buildPullbackValueStructValue(sei);

     // Creates a trampoline block for given original successor block. The
     // trampoline block has the same arguments as the VJP successor block but
     // drops the last predecessor enum argument. The generated `switch_enum`
     // instruction branches to the trampoline block, and the trampoline block
     // constructs a predecessor enum value and branches to the VJP successor
     // block.
     auto createTrampolineBasicBlock =
         [&](SILBasicBlock *origSuccBB) -> SILBasicBlock * {
       auto *vjpSuccBB = getOpBasicBlock(origSuccBB);
       // Create the trampoline block.
       auto *trampolineBB = vjp->createBasicBlockBefore(vjpSuccBB);
       for (auto *destArg : vjpSuccBB->getArguments().drop_back()) {
         auto *trampolineArg = trampolineBB->createPhiArgument(
             destArg->getType(), destArg->getOwnershipKind());
         // Each `@guaranteed` trampoline argument needs to have a
         // lifetime-ending use past its destination argument's lifetime-ending
         // uses, so we keep track of these pairs of arguments in
         // `trampolinedGuaranteedPhiArguments` and emit `end_borrow`s when
         // function cloning is finished.
         if (trampolineArg->getOwnershipKind() == ValueOwnershipKind::Guaranteed)
           trampolinedGuaranteedPhiArguments.push_back(
               {trampolineArg, cast<SILPhiArgument>(destArg)});
       }
       // Build predecessor enum value for successor block and branch to it.
       SILBuilder trampolineBuilder(trampolineBB);
       auto *succEnumVal = buildPredecessorEnumValue(
           trampolineBuilder, origBB, origSuccBB, pbStructVal);
       SmallVector<SILValue, 4> forwardedArguments(
           trampolineBB->getArguments().begin(),
           trampolineBB->getArguments().end());
       forwardedArguments.push_back(succEnumVal);
       trampolineBuilder.createBranch(sei->getLoc(), vjpSuccBB,
                                      forwardedArguments);
       return trampolineBB;
     };

     // Create trampoline successor basic blocks.
     SmallVector<std::pair<EnumElementDecl *, SILBasicBlock *>, 4> caseBBs;
     for (unsigned i : range(sei->getNumCases())) {
       auto caseBB = sei->getCase(i);
       auto *trampolineBB = createTrampolineBasicBlock(caseBB.second);
       caseBBs.push_back({caseBB.first, trampolineBB});
     }
     // Create trampoline default basic block.
     SILBasicBlock *newDefaultBB = nullptr;
     if (auto *defaultBB = sei->getDefaultBBOrNull().getPtrOrNull())
       newDefaultBB = createTrampolineBasicBlock(defaultBB);

     // Create a new `switch_enum` instruction.
     getBuilder().createSwitchEnum(
         sei->getLoc(), getOpValue(sei->getOperand()), newDefaultBB, caseBBs);
   }

   // If an `apply` has active results or active inout parameters, replace it
   // with an `apply` of its VJP.
   void visitApplyInst(ApplyInst *ai) {
     // If the function should not be differentiated or its the array literal
     // initialization intrinsic, just do standard cloning.
     if (!pullbackInfo.shouldDifferentiateApplyInst(ai) ||
         isArrayLiteralIntrinsic(ai)) {
       LLVM_DEBUG(getADDebugStream() << "No active results:\n" << *ai << '\n');
       TypeSubstCloner::visitApplyInst(ai);
       return;
     }

     // Check and reject functions with active inout arguments. It's not yet
     // supported.
     auto paramInfos = ai->getSubstCalleeConv().getParameters();
     auto paramArgs = ai->getArgumentsWithoutIndirectResults();
     for (unsigned i : swift::indices(paramInfos)) {
       if (paramInfos[i].isIndirectInOut() &&
           activityInfo.isActive(paramArgs[i], getIndices())) {
         context.emitNondifferentiabilityError(ai, invoker,
             diag::autodiff_cannot_differentiate_through_inout_arguments);
         errorOccurred = true;
         return;
       }
     }

     LLVM_DEBUG(getADDebugStream() << "VJP-transforming:\n" << *ai << '\n');

     // Get the minimal parameter and result indices required for differentiating
     // this `apply`.
     SmallVector<SILValue, 4> allResults;
     SmallVector<unsigned, 8> activeParamIndices;
     SmallVector<unsigned, 8> activeResultIndices;
     collectMinimalIndicesForFunctionCall(ai, getIndices(), activityInfo,
                                          allResults, activeParamIndices,
                                          activeResultIndices);
     assert(!activeParamIndices.empty() && "Parameter indices cannot be empty");
     assert(!activeResultIndices.empty() && "Result indices cannot be empty");
     LLVM_DEBUG(auto &s = getADDebugStream() << "Active indices: params={";
                interleave(activeParamIndices.begin(), activeParamIndices.end(),
                           [&s](unsigned i) { s << i; }, [&s] { s << ", "; });
                s << "}, results={"; interleave(
                    activeResultIndices.begin(), activeResultIndices.end(),
                    [&s](unsigned i) { s << i; }, [&s] { s << ", "; });
                s << "}\n";);
     // FIXME: We don't support multiple active results yet.
     if (activeResultIndices.size() > 1) {
       context.emitNondifferentiabilityError(
           ai, invoker, diag::autodiff_expression_not_differentiable_note);
       errorOccurred = true;
       return;
     }

     // Form expected indices, assuming there's only one result.
     SILAutoDiffIndices indices(
         activeResultIndices.front(),
         IndexSubset::get(
             getASTContext(), ai->getArgumentsWithoutIndirectResults().size(),
             activeParamIndices));

     // Emit the VJP.
     auto loc = ai->getLoc();
     auto &builder = getBuilder();
     auto original = getOpValue(ai->getCallee());
     SILValue vjpValue;
     // If functionSource is a `@differentiable` function, just extract it.
     auto originalFnTy = original->getType().castTo<SILFunctionType>();
     if (originalFnTy->isDifferentiable()) {
       auto paramIndices = originalFnTy->getDifferentiationParameterIndices();
       for (auto i : indices.parameters->getIndices()) {
         if (!paramIndices->contains(i)) {
           context.emitNondifferentiabilityError(original, invoker,
               diag::autodiff_function_nondiff_parameter_not_differentiable);
           errorOccurred = true;
           return;
         }
       }
       auto borrowedDiffFunc = builder.emitBeginBorrowOperation(loc, original);
       vjpValue = builder.createDifferentiableFunctionExtract(
           loc, NormalDifferentiableFunctionTypeComponent::VJP,
           borrowedDiffFunc);
       vjpValue = builder.emitCopyValueOperation(loc, vjpValue);
     }

     // Check and diagnose non-differentiable original function type.
     auto diagnoseNondifferentiableOriginalFunctionType =
         [&](CanSILFunctionType origFnTy) {
           // Check and diagnose non-differentiable arguments.
           for (unsigned paramIndex : range(originalFnTy->getNumParameters())) {
             if (indices.isWrtParameter(paramIndex) &&
                     !originalFnTy->getParameters()[paramIndex]
                     .getSILStorageType()
                     .isDifferentiable(getModule())) {
               context.emitNondifferentiabilityError(
                   ai->getArgumentsWithoutIndirectResults()[paramIndex], invoker,
                   diag::autodiff_nondifferentiable_argument);
               errorOccurred = true;
               return true;
             }
           }
           // Check and diagnose non-differentiable results.
           if (!originalFnTy->getResults()[indices.source]
                   .getSILStorageType()
                   .isDifferentiable(getModule())) {
             context.emitNondifferentiabilityError(
                 original, invoker, diag::autodiff_nondifferentiable_result);
             errorOccurred = true;
             return true;
           }
           return false;
         };
     if (diagnoseNondifferentiableOriginalFunctionType(originalFnTy))
       return;

     // If VJP has not yet been found, emit an `differentiable_function`
     // instruction on the remapped original function operand and
     // an `differentiable_function_extract` instruction to get the VJP.
     // The `differentiable_function` instruction will be canonicalized during
     // the transform main loop.
     if (!vjpValue) {
       // FIXME: Handle indirect differentiation invokers. This may require some
       // redesign: currently, each original function + attribute pair is mapped
       // only to one invoker.
       /*
       DifferentiationInvoker indirect(ai, attr);
       auto insertion =
           context.getInvokers().try_emplace({this->original, attr}, indirect);
       auto &invoker = insertion.first->getSecond();
       invoker = indirect;
       */

       // If the original `apply` instruction has a substitution map, then the
       // applied function is specialized.
       // In the VJP, specialization is also necessary for parity. The original
       // function operand is specialized with a remapped version of same
       // substitution map using an argument-less `partial_apply`.
       if (ai->getSubstitutionMap().empty()) {
         original = builder.emitCopyValueOperation(loc, original);
       } else {
         auto substMap = getOpSubstitutionMap(ai->getSubstitutionMap());
         auto vjpPartialApply = getBuilder().createPartialApply(
             ai->getLoc(), original, substMap, {},
             ParameterConvention::Direct_Guaranteed);
         original = vjpPartialApply;
         originalFnTy = original->getType().castTo<SILFunctionType>();
         // Diagnose if new original function type is non-differentiable.
         if (diagnoseNondifferentiableOriginalFunctionType(originalFnTy))
           return;
       }

       auto *diffFuncInst = context.createDifferentiableFunction(
           getBuilder(), loc, indices.parameters, original);

       // Record the `differentiable_function` instruction.
       context.getDifferentiableFunctionInsts().push_back(diffFuncInst);
       // TODO(TF-689): Make `differentiable_function` store result indices and
       // remove `ADContext::resultIndices`.
       context.getResultIndices()[diffFuncInst] = activeResultIndices.front();

       auto borrowedADFunc =
           builder.emitBeginBorrowOperation(loc, diffFuncInst);
       auto extractedVJP = getBuilder().createDifferentiableFunctionExtract(
           loc, NormalDifferentiableFunctionTypeComponent::VJP,
           borrowedADFunc);
       vjpValue = builder.emitCopyValueOperation(loc, extractedVJP);
       builder.emitEndBorrowOperation(loc, borrowedADFunc);
       builder.emitDestroyValueOperation(loc, diffFuncInst);
     }

     // Record desired/actual VJP indices.
     // Temporarily set original pullback type to `None`.
     NestedApplyInfo info{indices, /*originalPullbackType*/ None};
     auto insertion = context.getNestedApplyInfo().try_emplace(ai, info);
     auto &nestedApplyInfo = insertion.first->getSecond();
     nestedApplyInfo = info;

     // Call the VJP using the original parameters.
     SmallVector<SILValue, 8> vjpArgs;
     auto vjpFnTy = getOpType(vjpValue->getType()).castTo<SILFunctionType>();
     auto numVJPArgs =
         vjpFnTy->getNumParameters() + vjpFnTy->getNumIndirectFormalResults();
     vjpArgs.reserve(numVJPArgs);
     // Collect substituted arguments.
     for (auto origArg : ai->getArguments())
       vjpArgs.push_back(getOpValue(origArg));
     assert(vjpArgs.size() == numVJPArgs);
     // Apply the VJP.
     // The VJP should be specialized, so no substitution map is necessary.
     auto *vjpCall = getBuilder().createApply(loc, vjpValue, SubstitutionMap(),
                                              vjpArgs, ai->isNonThrowing());
     LLVM_DEBUG(getADDebugStream() << "Applied vjp function\n" << *vjpCall);
     builder.emitDestroyValueOperation(loc, vjpValue);

     // Get the VJP results (original results and pullback).
     SmallVector<SILValue, 8> vjpDirectResults;
     extractAllElements(vjpCall, getBuilder(), vjpDirectResults);
     ArrayRef<SILValue> originalDirectResults =
         ArrayRef<SILValue>(vjpDirectResults).drop_back(1);
     SILValue originalDirectResult = joinElements(originalDirectResults,
                                                  getBuilder(),
                                                  vjpCall->getLoc());
     SILValue pullback = vjpDirectResults.back();

     // Store the original result to the value map.
     mapValue(ai, originalDirectResult);

     // Checkpoint the pullback.
     auto *pullbackDecl = pullbackInfo.lookUpLinearMapDecl(ai);

     // If actual pullback type does not match lowered pullback type, reabstract
     // the pullback using a thunk.
     auto actualPullbackType =
         getOpType(pullback->getType()).getAs<SILFunctionType>();
     auto vjpGenSig = SubsMap.getGenericSignature()
         ? SubsMap.getGenericSignature()->getCanonicalSignature()
         : nullptr;
     Lowering::GenericContextScope genericContextScope(
         context.getTypeConverter(), vjpGenSig);
     auto loweredPullbackType =
         getOpType(context.getTypeConverter().getLoweredType(
                       pullbackDecl->getInterfaceType()->getCanonicalType(),
                       ResilienceExpansion::Minimal))
             .castTo<SILFunctionType>();
     if (!loweredPullbackType->isEqual(actualPullbackType)) {
       // Set non-reabstracted original pullback type in nested apply info.
       nestedApplyInfo.originalPullbackType = actualPullbackType;
       SILOptFunctionBuilder fb(context.getTransform());
       auto *thunk = getOrCreateReabstractionThunk(
           fb, getModule(), loc, /*caller*/ vjp, actualPullbackType,
           loweredPullbackType);
       auto *thunkRef = getBuilder().createFunctionRef(loc, thunk);
       pullback = getBuilder().createPartialApply(
           ai->getLoc(), thunkRef,
           getOpSubstitutionMap(thunk->getForwardingSubstitutionMap()),
           {pullback}, actualPullbackType->getCalleeConvention());
     }
     pullbackValues[ai->getParent()].push_back(pullback);

     // Some instructions that produce the callee may have been cloned.
     // If the original callee did not have any users beyond this `apply`,
     // recursively kill the cloned callee.
     if (auto *origCallee = cast_or_null<SingleValueInstruction>(
             ai->getCallee()->getDefiningInstruction()))
       if (origCallee->hasOneUse())
         recursivelyDeleteTriviallyDeadInstructions(
             getOpValue(origCallee)->getDefiningInstruction());
   }

   void visitDifferentiableFunctionInst(DifferentiableFunctionInst *dfi) {
     // Clone `differentiable_function` from original to VJP, then add the cloned
     // instruction to the `differentiable_function` worklist.
     TypeSubstCloner::visitDifferentiableFunctionInst(dfi);
     auto *newDFI = cast<DifferentiableFunctionInst>(getOpValue(dfi));
     context.getDifferentiableFunctionInsts().push_back(newDFI);
   }
 };
 } // end anonymous namespace

 //===----------------------------------------------------------------------===//
 // AdjointValue - a symbolic representation for adjoint values that allows
 // for efficient differentiation of aggregates.
 //===----------------------------------------------------------------------===//

 namespace {
 class PullbackEmitter;
 class AdjointValue;

 enum AdjointValueKind {
   /// An empty adjoint, i.e. zero. This case exists due to its special
   /// mathematical properties: `0 + x = x`. This is a guaranteed optimization
   /// when we combine a zero adjoint with another (e.g. differentiating a
   /// fanout).
   Zero,

   /// An aggregate of adjoint values.
   Aggregate,

   /// A concrete SIL value.
   Concrete,
 };

 class AdjointValueBase {
   friend class AdjointValue;

   /// The kind of this adjoint value.
   AdjointValueKind kind;

   /// The type of this value as if it were materialized as a SIL value.
   SILType type;

   /// The underlying value.
   union Value {
     ArrayRef<AdjointValue> aggregate;
     SILValue concrete;
     Value(ArrayRef<AdjointValue> v) : aggregate(v) {}
     Value(SILValue v) : concrete(v) {}
     Value() {}
   } value;

   explicit AdjointValueBase(SILType type,
                             ArrayRef<AdjointValue> aggregate)
       : kind(AdjointValueKind::Aggregate), type(type), value(aggregate) {}

   explicit AdjointValueBase(SILValue v)
       : kind(AdjointValueKind::Concrete), type(v->getType()), value(v) {}

   explicit AdjointValueBase(SILType type)
       : kind(AdjointValueKind::Zero), type(type) {}
 };

 /// A symbolic adjoint value that is capable of representing zero value 0 and
 /// 1, in addition to a materialized SILValue. This is expected to be passed
 /// around by value in most cases, as it's two words long.
 class AdjointValue final {
   friend class PullbackEmitter;

 private:
   /// The kind of this adjoint value.
   AdjointValueBase *base;
   /*implicit*/ AdjointValue(AdjointValueBase *base = nullptr) : base(base) {}

 public:
   AdjointValueBase *operator->() const { return base; }
   AdjointValueBase &operator*() const { return *base; }

   static AdjointValue createConcrete(llvm::BumpPtrAllocator &allocator,
                                      SILValue value) {
     return new (allocator.Allocate<AdjointValueBase>()) AdjointValueBase(value);
   }

   template<typename EltRange>
   static AdjointValue createAggregate(llvm::BumpPtrAllocator &allocator,
                                       SILType type, EltRange elements) {
     AdjointValue *buf = reinterpret_cast<AdjointValue *>(allocator.Allocate(
         elements.size() * sizeof(AdjointValue), alignof(AdjointValue)));
     MutableArrayRef<AdjointValue> elementsCopy(buf, elements.size());
     std::uninitialized_copy(elements.begin(), elements.end(),
                             elementsCopy.begin());
     return new (allocator.Allocate<AdjointValueBase>())
         AdjointValueBase(type, elementsCopy);
   }

   static AdjointValue createZero(llvm::BumpPtrAllocator &allocator,
                                  SILType type) {
     return new (allocator.Allocate<AdjointValueBase>()) AdjointValueBase(type);
   }

   AdjointValueKind getKind() const { return base->kind; }
   SILType getType() const { return base->type; }
   CanType getSwiftType() const { return getType().getASTType(); }

   NominalTypeDecl *getAnyNominal() const {
     return getSwiftType()->getAnyNominal();
   }

   bool isZero() const { return getKind() == AdjointValueKind::Zero; }
   bool isAggregate() const { return getKind() == AdjointValueKind::Aggregate; }
   bool isConcrete() const { return getKind() == AdjointValueKind::Concrete; }

   unsigned getNumAggregateElements() const {
     assert(isAggregate());
     return base->value.aggregate.size();
   }

   AdjointValue getAggregateElement(unsigned i) const {
     assert(isAggregate());
     return base->value.aggregate[i];
   }

   ArrayRef<AdjointValue> getAggregateElements() const {
     return base->value.aggregate;
   }

   SILValue getConcreteValue() const {
     assert(isConcrete());
     return base->value.concrete;
   }

   void print(llvm::raw_ostream &s) const {
     switch (getKind()) {
     case AdjointValueKind::Zero:
       s << "Zero";
       break;
     case AdjointValueKind::Aggregate:
       s << "Aggregate<";
       if (auto *decl =
             getType().getASTType()->getStructOrBoundGenericStruct()) {
         s << "Struct>(";
         interleave(llvm::zip(decl->getStoredProperties(),
                              base->value.aggregate),
                              [&s](std::tuple<VarDecl *,
                                              const AdjointValue &> elt) {
                                s << std::get<0>(elt)->getName() << ": ";
                                std::get<1>(elt).print(s);
                              }, [&s] { s << ", "; });
       } else if (auto tupleType = getType().getAs<TupleType>()) {
         s << "Tuple>(";
         interleave(base->value.aggregate,
                    [&s](const AdjointValue &elt) { elt.print(s); },
                    [&s] { s << ", "; });
       } else {
         llvm_unreachable("Invalid aggregate");
       }
       s << ')';
       break;
     case AdjointValueKind::Concrete:
       s << "Concrete(" << base->value.concrete << ')';
       break;
     }
   }
 };

 inline llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
                                      const AdjointValue &adjVal) {
   adjVal.print(os);
   return os;
 }

 } // end anonymous namespace

 namespace {

 class JVPEmitter final
     : public TypeSubstCloner<JVPEmitter, SILOptFunctionBuilder> {
 private:
   /// The global context.
   ADContext &context;

   /// The original function.
   SILFunction *const original;

   /// The `[differentiable]` attribute.
   SILDifferentiableAttr *const attr;

   /// The JVP function.
   SILFunction *const jvp;

   llvm::BumpPtrAllocator allocator;

   /// The differentiation invoker.
   DifferentiationInvoker invoker;

   /// Info from activity analysis on the original function.
   const DifferentiableActivityInfo &activityInfo;

   /// The differential info.
   LinearMapInfo differentialInfo;

   bool errorOccurred = false;

   //--------------------------------------------------------------------------//
   // Differential generation related fields
   //--------------------------------------------------------------------------//

   /// The builder for the differential function.
   SILBuilder differentialBuilder;

   /// Mapping from original basic blocks to corresponding differential basic
   /// blocks.
   DenseMap<SILBasicBlock *, SILBasicBlock *> diffBBMap;

   /// Mapping from original basic blocks and original values to corresponding
   /// tangent values.
   DenseMap<SILValue, AdjointValue> tangentValueMap;

   /// Mapping from original basic blocks and original buffers to corresponding
   /// tangent buffers.
   DenseMap<std::pair<SILBasicBlock *, SILValue>, SILValue> bufferMap;

   /// Mapping from differential basic blocks to differential struct arguments.
   DenseMap<SILBasicBlock *, SILArgument *> differentialStructArguments;

   /// Mapping from differential struct field declarations to differential struct
   /// elements destructured from the linear map basic block argument. In the
   /// beginning of each differential basic block, the block's differential
   /// struct is destructured into the individual elements stored here.
   DenseMap<VarDecl *, SILValue> differentialStructElements;

   /// An auxiliary differential local allocation builder.
   SILBuilder diffLocalAllocBuilder;

   /// Stack buffers allocated for storing local tangent values.
   SmallVector<SILValue, 8> differentialLocalAllocations;

   /// Mapping from original blocks to differential values. Used to build
   /// differential struct instances.
   DenseMap<SILBasicBlock *, SmallVector<SILValue, 8>> differentialValues;

   //--------------------------------------------------------------------------//
   // Getters
   //--------------------------------------------------------------------------//

   ASTContext &getASTContext() const { return jvp->getASTContext(); }
   SILModule &getModule() const { return jvp->getModule(); }
   const SILAutoDiffIndices &getIndices() const { return attr->getIndices(); }
   SILBuilder &getDifferentialBuilder() { return differentialBuilder; }
   SILFunction &getDifferential() {
     return differentialBuilder.getFunction();
   }
   SILArgument *getDifferentialStructArgument(SILBasicBlock *origBB) {
 #ifndef NDEBUG
     auto *diffStruct = differentialStructArguments[origBB]->getType()
         .getStructOrBoundGenericStruct();
     assert(diffStruct == differentialInfo.getLinearMapStruct(origBB));
 #endif
     return differentialStructArguments[origBB];
   }

   //--------------------------------------------------------------------------//
   // Initialization helpers
   //--------------------------------------------------------------------------//

   static SubstitutionMap getSubstitutionMap(SILFunction *original,
                                             SILFunction *jvp) {
     auto substMap = original->getForwardingSubstitutionMap();
     if (auto *jvpGenEnv = jvp->getGenericEnvironment()) {
       auto jvpSubstMap = jvpGenEnv->getForwardingSubstitutionMap();
       substMap = SubstitutionMap::get(
           jvpGenEnv->getGenericSignature(), QuerySubstitutionMap{jvpSubstMap},
           LookUpConformanceInSubstitutionMap(jvpSubstMap));
     }
     return substMap;
   }

   /// Returns the activity info about the SILValues in the original function.
   static const DifferentiableActivityInfo &getActivityInfo(
       ADContext &context, SILFunction *original,
       const SILAutoDiffIndices &indices, SILFunction *jvp) {
     // Get activity info of the original function.
     auto &passManager = context.getPassManager();
     auto *activityAnalysis =
         passManager.getAnalysis<DifferentiableActivityAnalysis>();
     auto &activityCollection = *activityAnalysis->get(original);
     auto &activityInfo = activityCollection.getActivityInfo(
         jvp->getLoweredFunctionType()->getGenericSignature(),
         AutoDiffDerivativeFunctionKind::JVP);
     LLVM_DEBUG(
         dumpActivityInfo(*original, indices, activityInfo, getADDebugStream()));
     return activityInfo;
   }

   //--------------------------------------------------------------------------//
   // Differential struct mapping
   //--------------------------------------------------------------------------//

   void initializeDifferentialStructElements(SILBasicBlock *origBB,
                                             SILInstructionResultArray values) {
     auto *diffStructDecl = differentialInfo.getLinearMapStruct(origBB);
     assert(diffStructDecl->getStoredProperties().size() == values.size() &&
            "The number of differential struct fields must equal the number of "
            "differential struct element values");
     for (auto pair : llvm::zip(diffStructDecl->getStoredProperties(), values)) {
       assert(
           std::get<1>(pair).getOwnershipKind() != ValueOwnershipKind::Guaranteed
               && "Differential struct elements must be @owned");
       auto insertion = differentialStructElements.insert({std::get<0>(pair),
                                                           std::get<1>(pair)});
       (void)insertion;
       assert(insertion.second &&
              "A differential struct element mapping already exists!");
     }
   }

   SILValue getDifferentialStructElement(SILBasicBlock *origBB, VarDecl *field) {
     assert(differentialInfo.getLinearMapStruct(origBB) ==
                cast<StructDecl>(field->getDeclContext()));
     assert(differentialStructElements.count(field) &&
            "Differential struct element for this field does not exist!");
     return differentialStructElements.lookup(field);
   }

   //--------------------------------------------------------------------------//
   // General utilities
   //--------------------------------------------------------------------------//

   SILBasicBlock::iterator getNextDifferentialLocalAllocationInsertionPoint() {
     // If there are no local allocations, insert at the beginning of the tangent
     // entry.
     if (differentialLocalAllocations.empty())
       return getDifferential().getEntryBlock()->begin();
     // Otherwise, insert before the last local allocation. Inserting before
     // rather than after ensures that allocation and zero initialization
     // instructions are grouped together.
     auto lastLocalAlloc = differentialLocalAllocations.back();
     auto it = lastLocalAlloc->getDefiningInstruction()->getIterator();
     return it;
   }

   /// Get the lowered SIL type of the given nominal type declaration.
   SILType getNominalDeclLoweredType(NominalTypeDecl *nominal) {
     auto nomType =
         getOpASTType(nominal->getDeclaredInterfaceType()->getCanonicalType());
     auto nomSILType = context.getTypeConverter().getLoweredType(
         nomType, ResilienceExpansion::Minimal);
     return nomSILType;
   }

   /// Build a differential struct value for the original block corresponding to
   /// the given terminator.
   StructInst *buildDifferentialValueStructValue(TermInst *termInst) {
     assert(termInst->getFunction() == original);
     auto loc = termInst->getFunction()->getLocation();
     auto *origBB = termInst->getParent();
     auto *jvpBB = BBMap[origBB];
     assert(jvpBB && "Basic block mapping should exist");
     auto *diffStruct = differentialInfo.getLinearMapStruct(origBB);
     assert(diffStruct && "The differential struct should have been declared");
     auto structLoweredTy = getNominalDeclLoweredType(diffStruct);
     auto bbDifferentialValues = differentialValues[origBB];
     if (!origBB->isEntry()) {
       auto *enumArg = jvpBB->getArguments().back();
       bbDifferentialValues.insert(bbDifferentialValues.begin(), enumArg);
     }
     return getBuilder().createStruct(loc, structLoweredTy,
                                      bbDifferentialValues);
   }

   //--------------------------------------------------------------------------//
   // Tangent value factory methods
   //--------------------------------------------------------------------------//

   AdjointValue makeZeroTangentValue(SILType type) {
     return AdjointValue::createZero(
         allocator, remapSILTypeInDifferential(type));
   }

   AdjointValue makeConcreteTangentValue(SILValue value) {
     return AdjointValue::createConcrete(allocator, value);
   }

   //--------------------------------------------------------------------------//
   // Tangent materialization
   //--------------------------------------------------------------------------//

   void emitZeroIndirect(CanType type, SILValue bufferAccess,
                         SILLocation loc) {
     auto builder = getDifferentialBuilder();
     auto tangentSpace = getTangentSpace(type);
     assert(tangentSpace && "No tangent space for this type");
     switch (tangentSpace->getKind()) {
     case VectorSpace::Kind::Vector:
       emitZeroIntoBuffer(builder, type, bufferAccess, loc);
       return;
     case VectorSpace::Kind::Tuple: {
       auto tupleType = tangentSpace->getTuple();
       SmallVector<SILValue, 8> zeroElements;
       for (unsigned i : range(tupleType->getNumElements())) {
         auto eltAddr = builder.createTupleElementAddr(loc, bufferAccess, i);
         emitZeroIndirect(tupleType->getElementType(i)->getCanonicalType(),
                          eltAddr, loc);
       }
       return;
     }
     case VectorSpace::Kind::Function: {
       llvm_unreachable(
           "Unimplemented: Emit thunks for abstracting zero initialization");
     }
     }
   }

   SILValue emitZeroDirect(CanType type, SILLocation loc) {
     auto diffBuilder = getDifferentialBuilder();
     auto silType = getModule().Types.getLoweredLoadableType(
         type, ResilienceExpansion::Minimal, getModule());
     auto *buffer = diffBuilder.createAllocStack(loc, silType);
     emitZeroIndirect(type, buffer, loc);
     auto loaded = diffBuilder.emitLoadValueOperation(
         loc, buffer, LoadOwnershipQualifier::Take);
     diffBuilder.createDeallocStack(loc, buffer);
     return loaded;
   }

   SILValue materializeTangentDirect(AdjointValue val, SILLocation loc) {
     assert(val.getType().isObject());
     LLVM_DEBUG(getADDebugStream()
                << "Materializing tangents for " << val << '\n');
     switch (val.getKind()) {
     case AdjointValueKind::Zero: {
       auto zeroVal = emitZeroDirect(val.getSwiftType(), loc);
       return zeroVal;
     }
     case AdjointValueKind::Aggregate:
       llvm_unreachable(
           "Tuples and structs are not supported in forward mode yet.");
     case AdjointValueKind::Concrete:
       return val.getConcreteValue();
   }
   }

   SILValue materializeTangent(AdjointValue val, SILLocation loc) {
     if (val.isConcrete()) {
       LLVM_DEBUG(getADDebugStream()
                  << "Materializing tangent: Value is concrete.\n");
       return val.getConcreteValue();
     }
     LLVM_DEBUG(getADDebugStream() << "Materializing tangent: Value is "
                                      "non-concrete. Materializing directly.\n");
     return materializeTangentDirect(val, loc);
   }

   //--------------------------------------------------------------------------//
   // Tangent buffer mapping
   //--------------------------------------------------------------------------//

   void setTangentBuffer(SILBasicBlock *origBB, SILValue originalBuffer,
                         SILValue tangentBuffer) {
     assert(originalBuffer->getType().isAddress());
     auto insertion =
         bufferMap.try_emplace({origBB, originalBuffer}, tangentBuffer);
     assert(insertion.second && "tangent buffer already exists.");
     (void)insertion;
   }

   SILValue &getTangentBuffer(SILBasicBlock *origBB, SILValue originalBuffer) {
     assert(originalBuffer->getType().isAddress());
     assert(originalBuffer->getFunction() == original);
     auto insertion = bufferMap.try_emplace({origBB, originalBuffer},
                                            SILValue());
     assert(!insertion.second && "tangent buffer should already exist");
     return insertion.first->getSecond();
   }

   //--------------------------------------------------------------------------//
   // Differential type calculations
   //--------------------------------------------------------------------------//

   /// Substitutes all replacement types of the given substitution map using the
   /// tangent function's substitution map.
   SubstitutionMap remapSubstitutionMapInDifferential(SubstitutionMap substMap) {
     return substMap.subst(getDifferential().getForwardingSubstitutionMap());
   }

   /// Remap any archetypes into the differential function's context.
   Type remapTypeInDifferential(Type ty) {
     if (ty->hasArchetype())
       return getDifferential().mapTypeIntoContext(ty->mapTypeOutOfContext());
     return getDifferential().mapTypeIntoContext(ty);
   }

   /// Remap any archetypes into the differential function's context.
   SILType remapSILTypeInDifferential(SILType ty) {
     if (ty.hasArchetype())
       return getDifferential().mapTypeIntoContext(ty.mapTypeOutOfContext());
     return getDifferential().mapTypeIntoContext(ty);
   }

   /// Find the tangent space of a given canonical type.
   Optional<VectorSpace> getTangentSpace(CanType type) {
     return type->getAutoDiffAssociatedTangentSpace(
         LookUpConformanceInModule(getModule().getSwiftModule()));
   }

   /// Assuming the given type conforms to `Differentiable` after remapping,
   /// returns the associated tangent space SIL type.
   SILType getRemappedTangentType(SILType type) {
     return SILType::getPrimitiveType(
         getTangentSpace(remapSILTypeInDifferential(type).getASTType())
             ->getCanonicalType(),
         type.getCategory());
   }

   //--------------------------------------------------------------------------//
   // Tangent value mapping
   //--------------------------------------------------------------------------//

   /// Get the tangent for an original value. The given value must be in the
   /// original function.
   ///
   /// This method first tries to find an entry in `tangentValueMap`. If an entry
   /// doesn't exist, create a zero tangent.
   AdjointValue getTangentValue(SILValue originalValue) {
     assert(originalValue->getType().isObject());
     assert(originalValue->getFunction() == original);
     auto insertion = tangentValueMap.try_emplace(
         originalValue, makeZeroTangentValue(
         getRemappedTangentType(originalValue->getType())));
     return insertion.first->getSecond();
   }

   /// Map the tangent value to the given original value.
   void setTangentValue(SILBasicBlock *origBB, SILValue originalValue,
                        AdjointValue newTangentValue) {
     if (auto *defInst = originalValue->getDefiningInstruction()) {
       bool isTupleTypedApplyResult =
           isa<ApplyInst>(defInst) && originalValue->getType().is<TupleType>();
       assert(!isTupleTypedApplyResult &&
              "Should not set tangent value for tuple-typed result from `apply` "
              "instruction; use `destructure_tuple` on `apply` result and set "
              "tangent value for `destructure_tuple` results instead.");
     }
     assert(originalValue->getType().isObject());
     assert(newTangentValue.getType().isObject());
     assert(originalValue->getFunction() == original);
     LLVM_DEBUG(getADDebugStream() << "Adding tangent for " << originalValue);
     // The tangent value must be in the tangent space.
     assert(newTangentValue.getType() ==
            getRemappedTangentType(originalValue->getType()));
     auto insertion =
         tangentValueMap.try_emplace(originalValue, newTangentValue);
     auto inserted = insertion.second;
     assert(inserted && "The tangent value should not already exist.");
   }

   //--------------------------------------------------------------------------//
   // Tangent emission helpers
   //--------------------------------------------------------------------------//
 public:
 #define CLONE_AND_EMIT_TANGENT(INST, ID) \
   void visit##INST##Inst(INST##Inst *inst) { \
     TypeSubstCloner::visit##INST##Inst(inst); \
     if (differentialInfo.shouldDifferentiateInstruction(inst)) \
       emitTangentFor##INST##Inst(inst); \
   } \
   void emitTangentFor##INST##Inst(INST##Inst *(ID))

   CLONE_AND_EMIT_TANGENT(BeginBorrow, bbi) {
     auto &diffBuilder = getDifferentialBuilder();
     auto loc = bbi->getLoc();
     auto tanVal = materializeTangent(getTangentValue(bbi->getOperand()), loc);
     auto tanValBorrow = diffBuilder.emitBeginBorrowOperation(loc, tanVal);
     setTangentValue(bbi->getParent(), bbi,
                     makeConcreteTangentValue(tanValBorrow));
   }

   CLONE_AND_EMIT_TANGENT(EndBorrow, ebi) {
     auto &diffBuilder = getDifferentialBuilder();
     auto loc = ebi->getLoc();
     auto tanVal = materializeTangent(getTangentValue(ebi->getOperand()), loc);
     diffBuilder.emitEndBorrowOperation(loc, tanVal);
   }

   CLONE_AND_EMIT_TANGENT(DestroyValue, dvi) {
     auto &diffBuilder = getDifferentialBuilder();
     auto loc = dvi->getLoc();
     auto tanVal = materializeTangent(getTangentValue(dvi->getOperand()), loc);
     diffBuilder.emitDestroyValue(loc, tanVal);
   }

   CLONE_AND_EMIT_TANGENT(CopyValue, cvi) {
     auto &diffBuilder = getDifferentialBuilder();
     auto tan = getTangentValue(cvi->getOperand());
     auto tanVal = materializeTangent(tan, cvi->getLoc());
     auto tanValCopy = diffBuilder.emitCopyValueOperation(cvi->getLoc(), tanVal);
     setTangentValue(cvi->getParent(), cvi,
                     makeConcreteTangentValue(tanValCopy));
   }

   /// Handle `load` instruction.
   ///   Original: y = load x
   ///    Tangent: tan[y] = load tan[x]
   CLONE_AND_EMIT_TANGENT(Load, li) {
     auto &diffBuilder = getDifferentialBuilder();
     auto *bb = li->getParent();
     auto loc = li->getLoc();
     auto tanBuf = getTangentBuffer(bb, li->getOperand());
     auto tanVal = diffBuilder.emitLoadValueOperation(
         loc, tanBuf, li->getOwnershipQualifier());
     setTangentValue(bb, li, makeConcreteTangentValue(tanVal));
   }

   /// Handle `load_borrow` instruction.
   ///   Original: y = load_borrow x
   ///    Tangent: tan[y] = load_borrow tan[x]
   CLONE_AND_EMIT_TANGENT(LoadBorrow, lbi) {
     auto &diffBuilder = getDifferentialBuilder();
     auto *bb = lbi->getParent();
     auto loc = lbi->getLoc();
     auto tanBuf = getTangentBuffer(bb, lbi->getOperand());
     auto tanVal = diffBuilder.emitLoadBorrowOperation(
         loc, tanBuf);
     setTangentValue(bb, lbi, makeConcreteTangentValue(tanVal));
   }

   /// Handle `store` instruction in the differential.
   ///   Original: store x to y
   ///     Tangent: store tan[x] to tan[y]
   CLONE_AND_EMIT_TANGENT(Store, si) {
     auto &diffBuilder = getDifferentialBuilder();
     auto loc = si->getLoc();
     auto tanValSrc = materializeTangent(getTangentValue(si->getSrc()), loc);
     auto &tanValDest = getTangentBuffer(si->getParent(), si->getDest());
     diffBuilder.emitStoreValueOperation(
         loc, tanValSrc, tanValDest, si->getOwnershipQualifier());
   }

   /// Handle `store_borrow` instruction in the differential.
   ///   Original: store_borrow x to y
   ///    Tangent: store_borrow tan[x] to tan[y]
   CLONE_AND_EMIT_TANGENT(StoreBorrow, sbi) {
      auto &diffBuilder = getDifferentialBuilder();
      auto loc = sbi->getLoc();
      auto tanValSrc = materializeTangent(getTangentValue(sbi->getSrc()), loc);
      auto &tanValDest = getTangentBuffer(sbi->getParent(), sbi->getDest());
     diffBuilder.createStoreBorrow(loc, tanValSrc, tanValDest);
   }

   /// Handle `copy_addr` instruction.
   ///   Original: copy_addr x to y
   ///    Tangent: copy_addr tan[x] to tan[y]
   CLONE_AND_EMIT_TANGENT(CopyAddr, cai) {
     auto *diffGenEnv = getDifferential().getGenericEnvironment();
     auto diffGenSig = diffGenEnv
         ? diffGenEnv->getGenericSignature()->getCanonicalSignature()
         : nullptr;
     Lowering::GenericContextScope genericContextScope(
         context.getTypeConverter(), diffGenSig);

     auto diffBuilder = getDifferentialBuilder();
     auto loc = cai->getLoc();
     auto *bb = cai->getParent();
     auto &tanSrc = getTangentBuffer(bb, cai->getSrc());
     auto tanDest = getTangentBuffer(bb, cai->getDest());

     diffBuilder.createCopyAddr(loc, tanSrc, tanDest, cai->isTakeOfSrc(),
                                cai->isInitializationOfDest());
   }

   /// Handle `unconditional_checked_cast_addr` instruction.
   ///   Original: unconditional_checked_cast_addr $X in x to $Y in y
   ///    Tangent: unconditional_checked_cast_addr $X.Tan in tan[x]
   ///                                          to $Y.Tan in tan[y]
   CLONE_AND_EMIT_TANGENT(UnconditionalCheckedCastAddr, uccai) {
     auto diffBuilder = getDifferentialBuilder();
     auto loc = uccai->getLoc();
     auto *bb = uccai->getParent();
     auto &tanSrc = getTangentBuffer(bb, uccai->getSrc());
     auto tanDest = getTangentBuffer(bb, uccai->getDest());

     diffBuilder.createUnconditionalCheckedCastAddr(
         loc, tanSrc, tanSrc->getType().getASTType(), tanDest,
         tanDest->getType().getASTType());
   }

   /// Handle `begin_access` instruction (and do differentiability checks).
   ///   Original: y = begin_access x
   ///    Tangent: tan[y] = begin_access tan[x]
   CLONE_AND_EMIT_TANGENT(BeginAccess, bai) {
     // Check for non-differentiable writes.
     if (bai->getAccessKind() == SILAccessKind::Modify) {
       if (auto *gai = dyn_cast<GlobalAddrInst>(bai->getSource())) {
         context.emitNondifferentiabilityError(bai, invoker,
             diag::autodiff_cannot_differentiate_writes_to_global_variables);
         errorOccurred = true;
         return;
       }
       if (auto *pbi = dyn_cast<ProjectBoxInst>(bai->getSource())) {
         context.emitNondifferentiabilityError(bai, invoker,
             diag::autodiff_cannot_differentiate_writes_to_mutable_captures);
         errorOccurred = true;
         return;
       }
     }

     auto &diffBuilder = getDifferentialBuilder();
     auto *bb = bai->getParent();

     auto tanSrc = getTangentBuffer(bb, bai->getSource());
     auto *tanDest = diffBuilder.createBeginAccess(
         bai->getLoc(), tanSrc, bai->getAccessKind(), bai->getEnforcement(),
         bai->hasNoNestedConflict(), bai->isFromBuiltin());
     setTangentBuffer(bb, bai, tanDest);
   }

   /// Handle `end_access` instruction.
   ///   Original: begin_access x
   ///    Tangent: end_access tan[x]
   CLONE_AND_EMIT_TANGENT(EndAccess, eai) {
     auto &diffBuilder = getDifferentialBuilder();
     auto *bb = eai->getParent();
     auto loc = eai->getLoc();
     auto tanSrc = getTangentBuffer(bb, eai->getOperand());
     diffBuilder.createEndAccess(loc, tanSrc, eai->isAborting());
   }

   /// Handle `alloc_stack` instruction.
   ///   Original: y = alloc_stack $T
   ///    Tangent: tan[y] = alloc_stack $T.Tangent
   CLONE_AND_EMIT_TANGENT(AllocStack, asi) {
     auto &diffBuilder = getDifferentialBuilder();
     auto *mappedAllocStackInst = diffBuilder.createAllocStack(
         asi->getLoc(), getRemappedTangentType(asi->getElementType()),
         asi->getVarInfo());
     bufferMap.try_emplace({asi->getParent(), asi},
                           mappedAllocStackInst);
   }

   /// Handle `dealloc_stack` instruction.
   ///   Original: dealloc_stack x
   ///    Tangent: dealloc_stack tan[x]
   CLONE_AND_EMIT_TANGENT(DeallocStack, dsi) {
     auto &diffBuilder = getDifferentialBuilder();
     auto tanBuf = getTangentBuffer(dsi->getParent(), dsi->getOperand());
     diffBuilder.createDeallocStack(dsi->getLoc(), tanBuf);
   }

   /// Handle `destroy_addr` instruction.
   ///   Original: destroy_addr x
   ///    Tangent: destroy_addr tan[x]
   CLONE_AND_EMIT_TANGENT(DestroyAddr, dai) {
     auto &diffBuilder = getDifferentialBuilder();
     auto tanBuf = getTangentBuffer(dai->getParent(), dai->getOperand());
     diffBuilder.createDestroyAddr(dai->getLoc(), tanBuf);
   }

   /// Handle `struct` instruction.
   ///   Original: y = struct $T (x0, x1, x2, ...)
   ///    Tangent: tan[y] = struct $T.Tangent (tan[x0], tan[x1], tan[x2], ...)
   CLONE_AND_EMIT_TANGENT(Struct, si) {
     auto &diffBuilder = getDifferentialBuilder();
     SmallVector<SILValue, 4> tangentElements;
     for (auto elem : si->getElements())
       tangentElements.push_back(getTangentValue(elem).getConcreteValue());
     auto tanExtract = diffBuilder.createStruct(
         si->getLoc(), getRemappedTangentType(si->getType()), tangentElements);
     setTangentValue(si->getParent(), si, makeConcreteTangentValue(tanExtract));
   }

   /// Handle `struct_extract` instruction.
   ///   Original: y = struct_extract x, #field
   ///    Tangent: tan[y] = struct_extract tan[x], #field'
   ///                                             ^~~~~~~
   ///                          field in tangent space corresponding to #field
   CLONE_AND_EMIT_TANGENT(StructExtract, sei) {
     assert(!sei->getField()->getAttrs().hasAttribute<NoDerivativeAttr>() &&
            "`struct_extract` with `@noDerivative` field should not be "
            "differentiated; activity analysis should not marked as varied.");

     auto diffBuilder = getDifferentialBuilder();;
     auto tangentVectorTy =
         getRemappedTangentType(sei->getOperand()->getType());
     auto *tangentVectorDecl =
         tangentVectorTy.getStructOrBoundGenericStruct();

     // Find the corresponding field in the tangent space.
     VarDecl *tanField = nullptr;
     // If the tangent space is the original struct, then field is the same.
     if (tangentVectorDecl == sei->getStructDecl())
       tanField = sei->getField();
     // Otherwise, look up the field by name.
     else {
       auto tanFieldLookup =
           tangentVectorDecl->lookupDirect(sei->getField()->getName());
       if (tanFieldLookup.empty()) {
         context.emitNondifferentiabilityError(
             sei, invoker,
             diag::autodiff_stored_property_no_corresponding_tangent,
             sei->getStructDecl()->getNameStr(),
             sei->getField()->getNameStr());
         errorOccurred = true;
         return;
       }
       tanField = cast<VarDecl>(tanFieldLookup.front());
     }
     // Emit tangent `struct_extract`.
     auto tanStruct =
         materializeTangent(getTangentValue(sei->getOperand()), sei->getLoc());
     auto tangentInst =
         diffBuilder.createStructExtract(sei->getLoc(), tanStruct, tanField);
     // Update tangent value mapping for `struct_extract` result.
     auto tangentResult =  makeConcreteTangentValue(tangentInst);
     setTangentValue(sei->getParent(), sei, tangentResult);
   }

   /// Handle `struct_element_addr` instruction.
   ///   Original: y = struct_element_addr x, #field
   ///    Tangent: tan[y] = struct_element_addr tan[x], #field'
   ///                                                  ^~~~~~~
   ///                          field in tangent space corresponding to #field
   CLONE_AND_EMIT_TANGENT(StructElementAddr, seai) {
     assert(!seai->getField()->getAttrs().hasAttribute<NoDerivativeAttr>() &&
            "`struct_element_addr` with `@noDerivative` field should not be "
            "differentiated; activity analysis should not marked as varied.");

     auto diffBuilder = getDifferentialBuilder();
     auto *bb = seai->getParent();
     auto tangentVectorTy =
         getRemappedTangentType(seai->getOperand()->getType());
     auto *tangentVectorDecl =
         tangentVectorTy.getStructOrBoundGenericStruct();

     // Find the corresponding field in the tangent space.
     VarDecl *tanField = nullptr;
     // If the tangent space is the original struct, then field is the same.
     if (tangentVectorDecl == seai->getStructDecl())
       tanField = seai->getField();
     // Otherwise, look up the field by name.
     else {
       auto tanFieldLookup =
           tangentVectorDecl->lookupDirect(seai->getField()->getName());
       if (tanFieldLookup.empty()) {
         context.emitNondifferentiabilityError(
             seai, invoker,
             diag::autodiff_stored_property_no_corresponding_tangent,
             seai->getStructDecl()->getNameStr(),
             seai->getField()->getNameStr());
         errorOccurred = true;
         return;
       }
       tanField = cast<VarDecl>(tanFieldLookup.front());
     }

     // Emit tangent `struct_element_addr`.
     auto tanOperand = getTangentBuffer(bb, seai->getOperand());
     auto tangentInst = diffBuilder.createStructElementAddr(
         seai->getLoc(), tanOperand, tanField);
     // Update tangent buffer map for `struct_element_addr`.
     setTangentBuffer(bb, seai, tangentInst);
   }

   /// Handle `tuple` instruction.
   ///   Original: y = tuple (x0, x1, x2, ...)
   ///    Tangent: tan[y] = tuple (tan[x0], tan[x1], tan[x2], ...)
   CLONE_AND_EMIT_TANGENT(Tuple, ti) {
     auto diffBuilder = getDifferentialBuilder();

     // Get the tangents of all the tuple elements.
     SmallVector<SILValue, 8> tangentTupleElements;
     for (auto elem : ti->getElements()) {
       tangentTupleElements.push_back(
           materializeTangent(getTangentValue(elem), ti->getLoc()));
     }

     // Emit the instruction and add the tangent mapping.
     auto tanTuple = diffBuilder.createTuple(ti->getLoc(), tangentTupleElements);
     setTangentValue(ti->getParent(), ti, makeConcreteTangentValue(tanTuple));
   }

   /// Handle `tuple_extract` instruction.
   ///   Original: y = tuple_extract x, <n>
   ///    Tangent: tan[y] = tuple_extract tan[x], <n'>
   ///                                            ^~~~
   ///                         tuple tangent space index corresponding to n
   CLONE_AND_EMIT_TANGENT(TupleExtract, tei) {
     auto &diffBuilder = getDifferentialBuilder();
     auto loc = tei->getLoc();
     auto origTupleTy = tei->getOperand()->getType().castTo<TupleType>();
     unsigned tanIndex = 0;
     for (unsigned i : range(tei->getFieldNo())) {
       if (getTangentSpace(
               origTupleTy->getElement(i).getType()->getCanonicalType()))
         ++tanIndex;
     }
     auto tanType = getRemappedTangentType(tei->getType());
     auto tanSource = materializeTangent(
         getTangentValue(tei->getOperand()), loc);
     SILValue tanBuf;
     // If the tangent buffer of the source does not have a tuple type, then
     // it must represent a "single element tuple type". Use it directly.
     if (!tanSource->getType().is<TupleType>()) {
       setTangentValue(tei->getParent(), tei,
                       makeConcreteTangentValue(tanSource));
     } else {
       tanBuf = diffBuilder.createTupleExtract(loc, tanSource, tanIndex, tanType);
       bufferMap.try_emplace({tei->getParent(), tei}, tanBuf);
     }
   }

   /// Handle `tuple_element_addr` instruction.
   ///   Original: y = tuple_element_addr x, <n>
   ///    Tangent: tan[y] = tuple_element_addr tan[x], <n'>
   ///                                                ^~~~
   ///                            tuple tangent space index corresponding to n
   CLONE_AND_EMIT_TANGENT(TupleElementAddr, teai) {
     auto &diffBuilder = getDifferentialBuilder();
     auto origTupleTy = teai->getOperand()->getType().castTo<TupleType>();
     unsigned tanIndex = 0;
     for (unsigned i : range(teai->getFieldNo())) {
       if (getTangentSpace(
               origTupleTy->getElement(i).getType()->getCanonicalType()))
         ++tanIndex;
     }
     auto tanType = getRemappedTangentType(teai->getType());
     auto tanSource = getTangentBuffer(teai->getParent(), teai->getOperand());
     SILValue tanBuf;
     // If the tangent buffer of the source does not have a tuple type, then
     // it must represent a "single element tuple type". Use it directly.
     if (!tanSource->getType().is<TupleType>()) {
       tanBuf = tanSource;
     } else {
       tanBuf = diffBuilder.createTupleElementAddr(
           teai->getLoc(), tanSource, tanIndex, tanType);
     }
     bufferMap.try_emplace({teai->getParent(), teai}, tanBuf);
   }

   /// Handle `destructure_tuple` instruction.
   ///   Original: (y0, y1, ...)  = destructure_tuple x, <n>
   ///    Tangent: (tan[y0], tan[y1], ...) = destructure_tuple tan[x], <n'>
   ///                                                                 ^~~~
   ///                              tuple tangent space index corresponding to n
   CLONE_AND_EMIT_TANGENT(DestructureTuple, dti) {
     auto &diffBuilder = getDifferentialBuilder();
     auto *bb = dti->getParent();
     auto loc = dti->getLoc();

     SmallVector<SILValue, 2> activeOrigResults;
     bool hasActiveResult = false;
     for (auto result : dti->getResults()) {
       if (activityInfo.isActive(result, getIndices())) {
         activeOrigResults.push_back(result);
         hasActiveResult = true;
         break;
       }
     }
     assert(!activeOrigResults.empty() &&
            "original 'destructure_tuple' should have at least one active "
            "result");

     auto tanTuple =
         materializeTangent(getTangentValue(dti->getOperand()), loc);
     auto *tupleElements = diffBuilder.createDestructureTuple(loc, tanTuple);
     for (auto i : range(tupleElements->getNumResults())) {
       auto origElem = dti->getResult(i);
       auto tanElem = tupleElements->getResult(i);
       setTangentValue(bb, origElem, makeConcreteTangentValue(tanElem));
     }
   }

 #undef CLONE_AND_EMIT_TANGENT

   /// Handle `apply` instruction.
   ///   Original: y = apply f(x0, x1, ...)
   ///    Tangent: tan[y] = apply diff_f(tan[x0], tan[x1], ...)
   void emitTangentForApplyInst(ApplyInst *ai,
                                const SILAutoDiffIndices &actualIndices,
                                CanSILFunctionType originalDifferentialType) {
     assert(differentialInfo.shouldDifferentiateApplyInst(ai));
     auto *bb = ai->getParent();
     auto loc = ai->getLoc();
     auto &diffBuilder = getDifferentialBuilder();

     // Get the differential value.
     auto *field = differentialInfo.lookUpLinearMapDecl(ai);
     assert(field);
     SILValue differential = getDifferentialStructElement(bb, field);
     auto differentialType = remapSILTypeInDifferential(differential->getType())
         .castTo<SILFunctionType>();

     // Get the differential arguments.
     SmallVector<SILValue, 8> diffArgs;

     for (auto indRes : ai->getIndirectSILResults())
       diffArgs.push_back(getTangentBuffer(bb, indRes));

     auto paramArgs = ai->getArgumentsWithoutIndirectResults();
     // Get the tangent value of the original arguments.
     for (auto i : indices(paramArgs)) {
       auto origArg = paramArgs[i];
       // If the argument is not active:
       // - Skip the element, if it is not differentiable.
       // - Otherwise, add a zero value to that location.
       if (!activityInfo.isActive(origArg, getIndices())) {
         auto origCalleeType = ai->getSubstCalleeType();
         if (!origCalleeType->isDifferentiable())
           continue;
         auto actualOrigCalleeIndices =
             origCalleeType->getDifferentiationParameterIndices();
         if (actualOrigCalleeIndices->contains(i)) {
           SILValue tanParam;
           if (origArg->getType().isObject()) {
             tanParam = emitZeroDirect(
                 getRemappedTangentType(origArg->getType()).getASTType(), loc);
             diffArgs.push_back(tanParam);
           } else {
             tanParam = diffBuilder.createAllocStack(
                 loc, getRemappedTangentType(origArg->getType()));
             emitZeroIndirect(
                 getRemappedTangentType(origArg->getType()).getASTType(), tanParam,
                 loc);
           }
         }
       }
       // Otherwise, if the argument is active, handle the argument normally by
       // getting its tangent value.
       else {
         SILValue tanParam;
         if (origArg->getType().isObject()) {
           tanParam = materializeTangent(getTangentValue(origArg), loc);
         } else {
           tanParam = getTangentBuffer(ai->getParent(), origArg);
         }
         diffArgs.push_back(tanParam);
         if (errorOccurred)
           return;
       }
     }

     // If callee differential was reabstracted in JVP, reabstract the callee
     // differential.
     if (!differentialType->isEqual(originalDifferentialType)) {
       SILOptFunctionBuilder fb(context.getTransform());
       auto *thunk = getOrCreateReabstractionThunk(
           fb, context.getModule(), loc, &getDifferential(),
           differentialType, originalDifferentialType);
       auto *thunkRef = diffBuilder.createFunctionRef(loc, thunk);
       differential = diffBuilder.createPartialApply(
          loc, thunkRef,
          remapSubstitutionMapInDifferential(
              thunk->getForwardingSubstitutionMap()),
          {differential}, differentialType->getCalleeConvention());
     }

     // Call the differential.
     auto *differentialCall = diffBuilder.createApply(
         loc, differential, SubstitutionMap(), diffArgs,
         /*isNonThrowing*/ false);
     diffBuilder.emitDestroyValueOperation(loc, differential);
     assert(differentialCall->getNumResults() == 1 &&
            "Expected differential to return one result");

     // Get the original results of the `apply` instructions.
     SmallVector<SILValue, 8> origDirectResults;
     forEachApplyDirectResult(ai, [&](SILValue directResult) {
       origDirectResults.push_back(directResult);
     });
     SmallVector<SILValue, 8> origAllResults;
     collectAllActualResultsInTypeOrder(ai, origDirectResults, origAllResults);
     auto origResult = origAllResults[actualIndices.source];

     // Get the differential results of the `apply` instructions.
     SmallVector<SILValue, 8> differentialDirectResults;
     forEachApplyDirectResult(differentialCall, [&](SILValue directResult) {
       differentialDirectResults.push_back(directResult);
     });
     SmallVector<SILValue, 8> differentialAllResults;
     collectAllActualResultsInTypeOrder(differentialCall,
                                        differentialDirectResults,
                                        differentialAllResults);
     auto differentialResult = differentialAllResults.front();

     // Add tangent for original result.
     if (origResult->getType().isObject()) {
       if (!origResult->getType().is<TupleType>()) {
         setTangentValue(bb, origResult,
             makeConcreteTangentValue(differentialResult));
       } else if (auto *dti = getSingleDestructureTupleUser(ai)) {
         bool notSetValue = true;
         for (auto result : dti->getResults()) {
           if (activityInfo.isActive(result, getIndices())) {
             assert(notSetValue &&
                    "This was incorrectly set, should only have one active "
                    "result from the tuple.");
             notSetValue = false;
             setTangentValue(bb, result,
                             makeConcreteTangentValue(differentialResult));
           }
         }
       }
     }
   }

   /// Generate a `return` instruction in the current differential basic block.
   void emitReturnInstForDifferential() {
     auto &differential = getDifferential();
     auto diffLoc = differential.getLocation();
     auto &diffBuilder = getDifferentialBuilder();

     SmallVector<SILValue, 2> activeResults;

     // This vector will contain all the materialized return elements.
     SmallVector<SILValue, 8> retElts;
     SmallVector<SILValue, 2> originalResults;
     collectAllDirectResultsInTypeOrder(*original, originalResults);

     // Materializes the return element corresponding to the result
     // `resultIndex` into the `retElts` vector.
     auto addActiveResult = [&](unsigned resultIndex) -> void {
       auto origResult = originalResults[resultIndex];
       assert(origResult->getType().isObject() &&
              "Should only be handling direct results for 'return' "
              "instruction.");
       if (activityInfo.isActive(origResult, getIndices())) {
         activeResults.push_back(origResult);
       }
     };
     // Create an array of the direct tangent values of the original results.
     for (auto i : range(originalResults.size()))
       addActiveResult(i);
     assert(activeResults.size() <= 1);

     if (activeResults.empty() && !originalResults.empty()) {
       // Create zero tangent value for direct result.
       auto origResult = originalResults[getIndices().source];
       assert(origResult->getType().isObject() &&
              "Should only be handling direct results for 'return' "
              "instruction.");
       auto zeroType = origResult->getType().getASTType();
       auto zero =
           emitZeroDirect(getTangentSpace(zeroType)->getCanonicalType(),
                          diffLoc);
       retElts.push_back(zero);
     } else if (!activeResults.empty()) {
       auto diffVal = getTangentValue(activeResults.front());
       auto val = materializeTangent(diffVal, diffLoc);
       retElts.push_back(val);
     }

     diffBuilder.createReturn(
         diffLoc, joinElements(retElts, diffBuilder, diffLoc));
   }

 private:

   /// Set up the differential function. This includes:
   /// - Creating all differential blocks.
   /// - Creating differential entry block arguments based on the function type.
   /// - Creating tangent value mapping for original/differential parameters.
   /// - Checking for unvaried result and emitting related warnings.
   void prepareForDifferentialGeneration() {
     // Create differential blocks and arguments.
     auto *diffGenEnv = getDifferential().getGenericEnvironment();
     auto diffGenSig = diffGenEnv
         ? diffGenEnv->getGenericSignature()->getCanonicalSignature()
         : nullptr;
     auto &differential = getDifferential();
     auto *origEntry = original->getEntryBlock();
     for (auto &origBB : *original) {
       auto *diffBB = differential.createBasicBlock();
       diffBBMap.insert({&origBB, diffBB});
       {
         Lowering::GenericContextScope genericContextScope(
             context.getTypeConverter(), diffGenSig);
         auto diffStructLoweredType = remapSILTypeInDifferential(
             differentialInfo.getLinearMapStructLoweredType(&origBB));

         // If the BB is the original entry, then the differential block that we
         // just created must be the differential function's entry. Create
         // differential entry arguments and continue.
         if (&origBB == origEntry) {
           assert(diffBB->isEntry());
           createEntryArguments(&differential);
           auto *lastArg = diffBB->getArguments().back();
           assert(lastArg->getType() == diffStructLoweredType);
           differentialStructArguments[&origBB] = lastArg;
         }
       }

       LLVM_DEBUG({
         auto &s = getADDebugStream()
                   << "Original bb" + std::to_string(origBB.getDebugID())
                   << ": To differentiate or not to differentiate?\n";
         for (auto &inst : origBB) {
           s << (differentialInfo.shouldDifferentiateInstruction(&inst)
                     ? "[∂] " : "[ ] ")
             << inst;
         }
       });
     }

     assert(diffBBMap.size() == 1 &&
            "Can only currently handle single basic block functions");

     // The differential function has type:
     // (arg0', ..., argn', entry_df_struct) -> result'.
     auto diffParamArgs =
         differential.getArgumentsWithoutIndirectResults().drop_back();
     assert(diffParamArgs.size() ==
            attr->getIndices().parameters->getNumIndices());
     auto origParamArgs = original->getArgumentsWithoutIndirectResults();

     // TODO(TF-788): Re-enable non-varied result warning.
     /*
     // Check if result is not varied.
     SmallVector<SILValue, 8> origFormalResults;
     collectAllFormalResultsInTypeOrder(*original, origFormalResults);
     auto origResult = origFormalResults[getIndices().source];
     // Emit warning if original result is not varied, because it will always
     // have a zero derivative.
     if (!activityInfo.isVaried(origResult, getIndices().parameters)) {
       // Emit fixit if original result has a valid source location.
       auto startLoc = origResult.getLoc().getStartSourceLoc();
       auto endLoc = origResult.getLoc().getEndSourceLoc();
       if (startLoc.isValid() && endLoc.isValid()) {
         context.diagnose(startLoc, diag::autodiff_nonvaried_result_fixit)
             .fixItInsert(startLoc, "withoutDerivative(at:")
             .fixItInsertAfter(endLoc, ")");
       }
     }
     */

     // Initialize tangent mapping for parameters.
     auto diffParamsIt = getIndices().parameters->begin();
     for (auto index : range(diffParamArgs.size())) {
       auto *diffArg = diffParamArgs[index];
       auto *origArg = origParamArgs[*diffParamsIt];
       diffParamsIt++;
       if (diffArg->getType().isAddress()) {
         setTangentBuffer(origEntry, origArg, diffArg);
       } else {
         setTangentValue(
             origEntry, origArg, makeConcreteTangentValue(diffArg));
       }
       LLVM_DEBUG(getADDebugStream()
                  << "Assigned parameter " << *diffArg
                  << " as the tangent of original result " << *origArg);
     }

     // Initialize tangent mapping for indirect results.
     auto origIndResults = original->getIndirectResults();
     auto diffIndResults = differential.getIndirectResults();
     assert(origIndResults.size() == diffIndResults.size());

     for (auto &origBB : *original)
       for (auto i : indices(diffIndResults))
         setTangentBuffer(&origBB, origIndResults[i], diffIndResults[i]);
   }

 public:
   explicit JVPEmitter(ADContext &context, SILFunction *original,
                       SILDifferentiableAttr *attr, SILFunction *jvp,
                       DifferentiationInvoker invoker)
       : TypeSubstCloner(*jvp, *original, getSubstitutionMap(original, jvp)),
         context(context), original(original), attr(attr), jvp(jvp),
         invoker(invoker), activityInfo(getActivityInfo(
                               context, original, attr->getIndices(), jvp)),
         differentialInfo(context, AutoDiffLinearMapKind::Differential, original,
                          jvp, attr->getIndices(), activityInfo),
         differentialBuilder(SILBuilder(*createEmptyDifferential(
             context, original, attr, &differentialInfo))),
         diffLocalAllocBuilder(getDifferential()) {
     // Create empty differential function.
     context.getGeneratedFunctions().push_back(&getDifferential());
   }

   static SILFunction *createEmptyDifferential(ADContext &context,
                                               SILFunction *original,
                                               SILDifferentiableAttr *attr,
                                               LinearMapInfo *linearMapInfo) {
     auto &module = context.getModule();
     auto origTy = original->getLoweredFunctionType();
     auto lookupConformance = LookUpConformanceInModule(module.getSwiftModule());

     // RAII that pushes the original function's generic signature to
     // `module.Types` so that calls to `module.Types.getTypeLowering()` below
     // will know the original function's generic parameter types.
     Lowering::GenericContextScope genericContextScope(
         module.Types, origTy->getGenericSignature());

     // Parameters of the differential are:
     // - the tangent values of the wrt parameters.
     // - the differential struct for the original entry.
     // Result of the differential is in the tangent space of the original
     // result.
     SmallVector<SILParameterInfo, 8> dfParams;
     SmallVector<SILResultInfo, 8> dfResults;
     auto origParams = origTy->getParameters();
     auto indices = attr->getIndices();

     // Add differential results.
     auto origResInfo = origTy->getResults()[indices.source];
     dfResults.push_back(
         SILResultInfo(origResInfo.getType()
                           ->getAutoDiffAssociatedTangentSpace(lookupConformance)
                           ->getCanonicalType(),
                       origResInfo.getConvention()));

     // Add differential parameters for the requested wrt parameters.
     for (auto i : indices.parameters->getIndices()) {
       auto origParam = origParams[i];
       dfParams.push_back(SILParameterInfo(
           origParam.getType()
               ->getAutoDiffAssociatedTangentSpace(lookupConformance)
               ->getCanonicalType(),
           origParam.getConvention()));
     }

     // Accept a differential struct in the differential parameter list. This is
     // the returned differential's closure context.
     auto *origEntry = original->getEntryBlock();
     auto *dfStruct = linearMapInfo->getLinearMapStruct(origEntry);
     auto dfStructType =
         dfStruct->getDeclaredInterfaceType()->getCanonicalType();
     dfParams.push_back({dfStructType, ParameterConvention::Direct_Owned});

     Mangle::ASTMangler mangler;
     auto diffName = original->getASTContext().getIdentifier(
         mangler.mangleAutoDiffLinearMapHelper(
             original->getName(), AutoDiffLinearMapKind::Differential,
             indices)).str();
     auto diffGenericSig = getDerivativeGenericSignature(attr, original);
     auto *diffGenericEnv =
         diffGenericSig ? diffGenericSig->getGenericEnvironment() : nullptr;
     auto diffType = SILFunctionType::get(
         diffGenericSig, origTy->getExtInfo(), origTy->getCoroutineKind(),
         origTy->getCalleeConvention(), dfParams, {}, dfResults, None,
         original->getASTContext());

     SILOptFunctionBuilder fb(context.getTransform());
     // The generated tangent linkage is set to Hidden because generated tangent
     // are never called cross-module.
     auto linkage = SILLinkage::Hidden;
     auto *differential = fb.createFunction(
         linkage, diffName, diffType, diffGenericEnv, original->getLocation(),
         original->isBare(), IsNotTransparent, original->isSerialized(),
         original->isDynamicallyReplaceable());
     differential->setDebugScope(
         new (module) SILDebugScope(original->getLocation(), differential));

     return differential;
   }

   /// Run JVP generation. Returns true on error.
   bool run() {
     LLVM_DEBUG(getADDebugStream()
                << "Cloning original @" << original->getName()
                << " to jvp @" << jvp->getName() << '\n');
     // Create JVP and differential entry and arguments.
     auto *entry = jvp->createBasicBlock();
     createEntryArguments(jvp);
     prepareForDifferentialGeneration();
     // Clone.
     SmallVector<SILValue, 4> entryArgs(entry->getArguments().begin(),
                                        entry->getArguments().end());
     cloneFunctionBody(original, entry, entryArgs);
     emitReturnInstForDifferential();
     // If errors occurred, back out.
     if (errorOccurred)
       return true;
     LLVM_DEBUG(getADDebugStream() << "Generated JVP for "
                << original->getName() << ":\n" << *jvp);
     LLVM_DEBUG(getADDebugStream() << "Generated differential for "
                << original->getName() << ":\n" << getDifferential());
     return errorOccurred;
   }

   void postProcess(SILInstruction *orig, SILInstruction *cloned) {
     if (errorOccurred)
       return;
     SILClonerWithScopes::postProcess(orig, cloned);
   }

   /// Remap original basic blocks.
   SILBasicBlock *remapBasicBlock(SILBasicBlock *bb) {
     auto *jvpBB = BBMap[bb];
     return jvpBB;
   }

   /// General visitor for all instructions. If any error is emitted by previous
   /// visits, bail out.
   void visit(SILInstruction *inst) {
     auto diffBuilder = getDifferentialBuilder();
     if (errorOccurred)
       return;
     if (differentialInfo.shouldDifferentiateInstruction(inst)) {
       LLVM_DEBUG(getADDebugStream() << "JVPEmitter visited:\n[ORIG]" << *inst);
 #ifndef NDEBUG
       auto beforeInsertion = std::prev(diffBuilder.getInsertionPoint());
 #endif
       TypeSubstCloner::visit(inst);
       LLVM_DEBUG({
         auto &s = llvm::dbgs() << "[TAN] Emitted in differential:\n";
         auto afterInsertion = diffBuilder.getInsertionPoint();
         for (auto it = ++beforeInsertion; it != afterInsertion; ++it)
           s << *it;
       });
     } else {
       TypeSubstCloner::visit(inst);
     }
   }

   void visitSILInstruction(SILInstruction *inst) {
     context.emitNondifferentiabilityError(inst, invoker,
         diag::autodiff_expression_not_differentiable_note);
     errorOccurred = true;
   }

   void visitInstructionsInBlock(SILBasicBlock *bb) {
     // Destructure the differential struct to get the elements.
     auto &diffBuilder = getDifferentialBuilder();
     auto diffLoc = getDifferential().getLocation();
     auto *diffBB = diffBBMap.lookup(bb);
     auto *mainDifferentialStruct = diffBB->getArguments().back();
     diffBuilder.setInsertionPoint(diffBB);
     auto *dsi = diffBuilder.createDestructureStruct(
         diffLoc, mainDifferentialStruct);
     initializeDifferentialStructElements(bb, dsi->getResults());
     TypeSubstCloner::visitInstructionsInBlock(bb);
   }

   // If an `apply` has active results or active inout parameters, replace it
   // with an `apply` of its JVP.
   void visitApplyInst(ApplyInst *ai) {
     // If the function should not be differentiated or its the array literal
     // initialization intrinsic, just do standard cloning.
     if (!differentialInfo.shouldDifferentiateApplyInst(ai) ||
         isArrayLiteralIntrinsic(ai)) {
       LLVM_DEBUG(getADDebugStream() << "No active results:\n" << *ai << '\n');
       TypeSubstCloner::visitApplyInst(ai);
       return;
     }

     // Check and reject functions with active inout arguments. It's not yet
     // supported.
     auto paramInfos = ai->getSubstCalleeConv().getParameters();
     auto paramArgs = ai->getArgumentsWithoutIndirectResults();
     for (unsigned i : swift::indices(paramInfos)) {
       if (paramInfos[i].isIndirectInOut() &&
           activityInfo.isActive(paramArgs[i], getIndices())) {
         context.emitNondifferentiabilityError(ai, invoker,
             diag::autodiff_cannot_differentiate_through_inout_arguments);
         errorOccurred = true;
         return;
       }
     }

     LLVM_DEBUG(getADDebugStream() << "JVP-transforming:\n" << *ai << '\n');

     // Get the minimal parameter and result indices required for differentiating
     // this `apply`.
     SmallVector<SILValue, 4> allResults;
     SmallVector<unsigned, 8> activeParamIndices;
     SmallVector<unsigned, 8> activeResultIndices;
     collectMinimalIndicesForFunctionCall(ai, getIndices(), activityInfo,
                                          allResults, activeParamIndices,
                                          activeResultIndices);
     assert(!activeParamIndices.empty() && "Parameter indices cannot be empty");
     assert(!activeResultIndices.empty() && "Result indices cannot be empty");
     LLVM_DEBUG(auto &s = getADDebugStream() << "Active indices: params={";
                interleave(activeParamIndices.begin(), activeParamIndices.end(),
                           [&s](unsigned i) { s << i; }, [&s] { s << ", "; });
                s << "}, results={"; interleave(
                    activeResultIndices.begin(), activeResultIndices.end(),
                    [&s](unsigned i) { s << i; }, [&s] { s << ", "; });
                s << "}\n";);
     // FIXME: We don't support multiple active results yet.
     if (activeResultIndices.size() > 1) {
       context.emitNondifferentiabilityError(
           ai, invoker, diag::autodiff_expression_not_differentiable_note);
       errorOccurred = true;
       return;
     }
     // Form expected indices, assuming there's only one result.
     SILAutoDiffIndices indices(
         activeResultIndices.front(),
         IndexSubset::get(
             getASTContext(), ai->getArgumentsWithoutIndirectResults().size(),
             activeParamIndices));

     // Emit the JVP.
     auto loc = ai->getLoc();
     auto &builder = getBuilder();
     auto original = getOpValue(ai->getCallee());
     SILValue jvpValue;
     // If functionSource is a `@differentiable` function, just extract it.
     auto originalFnTy = original->getType().castTo<SILFunctionType>();
     if (originalFnTy->isDifferentiable()) {
       auto paramIndices = originalFnTy->getDifferentiationParameterIndices();
       for (auto i : indices.parameters->getIndices()) {
         if (!paramIndices->contains(i)) {
           context.emitNondifferentiabilityError(original, invoker,
               diag::autodiff_function_nondiff_parameter_not_differentiable);
           errorOccurred = true;
           return;
         }
       }
       auto borrowedDiffFunc = builder.emitBeginBorrowOperation(loc, original);
       jvpValue = builder.createDifferentiableFunctionExtract(
           loc, NormalDifferentiableFunctionTypeComponent::JVP,
           borrowedDiffFunc);
       jvpValue = builder.emitCopyValueOperation(loc, jvpValue);
     }

     // If JVP has not yet been found, emit an `differentiable_function`
     // instruction on the remapped original function operand and
     // an `differentiable_function_extract` instruction to get the JVP.
     // The `differentiable_function` instruction will be canonicalized during
     // the transform main loop.
     if (!jvpValue) {
       // FIXME: Handle indirect differentiation invokers. This may require some
       // redesign: currently, each original function + attribute pair is mapped
       // only to one invoker.
       /*
        DifferentiationInvoker indirect(ai, attr);
        auto insertion =
            context.getInvokers().try_emplace({this->original, attr}, indirect);
        auto &invoker = insertion.first->getSecond();
        invoker = indirect;
        */

       // If the original `apply` instruction has a substitution map, then the
       // applied function is specialized.
       // In the JVP, specialization is also necessary for parity. The original
       // function operand is specialized with a remapped version of same
       // substitution map using an argument-less `partial_apply`.
       if (ai->getSubstitutionMap().empty()) {
         original = builder.emitCopyValueOperation(loc, original);
       } else {
         auto substMap = getOpSubstitutionMap(ai->getSubstitutionMap());
         auto jvpPartialApply = getBuilder().createPartialApply(
             ai->getLoc(), original, substMap, {},
             ParameterConvention::Direct_Guaranteed);
         original = jvpPartialApply;
       }

       // Check and diagnose non-differentiable original function type.
       auto diagnoseNondifferentiableOriginalFunctionType =
           [&](CanSILFunctionType origFnTy) {
             // Check and diagnose non-differentiable arguments.
             for (unsigned paramIndex : range(originalFnTy->getNumParameters())) {
               if (indices.isWrtParameter(paramIndex) &&
                       !originalFnTy->getParameters()[paramIndex]
                       .getSILStorageType()
                       .isDifferentiable(getModule())) {
                 context.emitNondifferentiabilityError(
                     ai->getArgumentsWithoutIndirectResults()[paramIndex], invoker,
                     diag::autodiff_nondifferentiable_argument);
                 errorOccurred = true;
                 return true;
               }
             }
             // Check and diagnose non-differentiable results.
             if (!originalFnTy->getResults()[indices.source]
                     .getSILStorageType()
                     .isDifferentiable(getModule())) {
               context.emitNondifferentiabilityError(
                   original, invoker, diag::autodiff_nondifferentiable_result);
               errorOccurred = true;
               return true;
             }
             return false;
           };
       if (diagnoseNondifferentiableOriginalFunctionType(originalFnTy))
         return;

       auto *diffFuncInst = context.createDifferentiableFunction(
           builder, loc, indices.parameters, original);

       // Record the `differentiable_function` instruction.
       context.getDifferentiableFunctionInsts().push_back(diffFuncInst);
       // TODO(TF-689): Make `differentiable_function` store result indices and
       // remove `ADContext::resultIndices`.
       context.getResultIndices()[diffFuncInst] = activeResultIndices.front();

       auto borrowedADFunc =
           builder.emitBeginBorrowOperation(loc, diffFuncInst);
       auto extractedJVP = builder.createDifferentiableFunctionExtract(
           loc, NormalDifferentiableFunctionTypeComponent::JVP,
           borrowedADFunc);
       jvpValue = builder.emitCopyValueOperation(loc, extractedJVP);
       builder.emitEndBorrowOperation(loc, borrowedADFunc);
       builder.emitDestroyValueOperation(loc, diffFuncInst);
     }

     // Call the JVP using the original parameters.
     SmallVector<SILValue, 8> jvpArgs;
     auto jvpFnTy = getOpType(jvpValue->getType()).castTo<SILFunctionType>();
     auto numJVPArgs =
         jvpFnTy->getNumParameters() + jvpFnTy->getNumIndirectFormalResults();
     jvpArgs.reserve(numJVPArgs);
     // Collect substituted arguments.
     for (auto origArg : ai->getArguments())
       jvpArgs.push_back(getOpValue(origArg));
     assert(jvpArgs.size() == numJVPArgs);
     // Apply the JVP.
     // The JVP should be specialized, so no substitution map is necessary.
     auto *jvpCall = getBuilder().createApply(loc, jvpValue, SubstitutionMap(),
                                              jvpArgs, ai->isNonThrowing());
     LLVM_DEBUG(getADDebugStream() << "Applied jvp function\n" << *jvpCall);

     // Release the differentiable function.
     builder.emitDestroyValueOperation(loc, jvpValue);

     // Get the JVP results (original results and differential).
     SmallVector<SILValue, 8> jvpDirectResults;
     extractAllElements(jvpCall, builder, jvpDirectResults);
     auto originalDirectResults =
         ArrayRef<SILValue>(jvpDirectResults).drop_back(1);
     auto originalDirectResult =
         joinElements(originalDirectResults, getBuilder(), jvpCall->getLoc());

     mapValue(ai, originalDirectResult);

     // Some instructions that produce the callee may have been cloned.
     // If the original callee did not have any users beyond this `apply`,
     // recursively kill the cloned callee.
     if (auto *origCallee = cast_or_null<SingleValueInstruction>(
             ai->getCallee()->getDefiningInstruction()))
       if (origCallee->hasOneUse())
         recursivelyDeleteTriviallyDeadInstructions(
             getOpValue(origCallee)->getDefiningInstruction());

     // Add the differential function for when we create the struct we partially
     // apply to the differential we are generating.
     auto differential = jvpDirectResults.back();
     auto *differentialDecl = differentialInfo.lookUpLinearMapDecl(ai);
     auto originalDifferentialType =
         getOpType(differential->getType()).getAs<SILFunctionType>();
     auto differentialType =
         remapType(differential->getType())
             .castTo<SILFunctionType>();
     auto jvpGenSig = SubsMap.getGenericSignature()
         ? SubsMap.getGenericSignature()->getCanonicalSignature()
         : nullptr;
     Lowering::GenericContextScope genericContextScope(
         context.getTypeConverter(), jvpGenSig);
     auto loweredDifferentialType =
         getOpType(context.getTypeConverter().getLoweredType(
             differentialDecl->getInterfaceType()->getCanonicalType(),
             ResilienceExpansion::Minimal))
             .castTo<SILFunctionType>();
     // If actual differential type does not match lowered differential type,
     // reabstract the differential using a thunk.
     if (!loweredDifferentialType->isEqual(originalDifferentialType)) {
       SILOptFunctionBuilder fb(context.getTransform());
       auto *thunk = getOrCreateReabstractionThunk(
           fb, context.getModule(), loc, &getDifferential(),
           differentialType, loweredDifferentialType);
       auto *thunkRef = builder.createFunctionRef(loc, thunk);
       differential = builder.createPartialApply(
           loc, thunkRef,
           getOpSubstitutionMap(thunk->getForwardingSubstitutionMap()),
           {differential}, differentialType->getCalleeConvention());
     }
     differentialValues[ai->getParent()].push_back(differential);

     // Differential emission.
     emitTangentForApplyInst(ai, indices, originalDifferentialType);
   }

   void visitReturnInst(ReturnInst *ri) {
     auto loc = ri->getOperand().getLoc();
     auto *origExit = ri->getParent();
     auto &builder = getBuilder();
     auto *diffStructVal = buildDifferentialValueStructValue(ri);

     // Get the JVP value corresponding to the original functions's return value.
     auto *origRetInst = cast<ReturnInst>(origExit->getTerminator());
     auto origResult = getOpValue(origRetInst->getOperand());
     SmallVector<SILValue, 8> origResults;
     extractAllElements(origResult, builder, origResults);

     // Get and partially apply the differential.
     auto jvpGenericEnv = jvp->getGenericEnvironment();
     auto jvpSubstMap = jvpGenericEnv
         ? jvpGenericEnv->getForwardingSubstitutionMap()
         : jvp->getForwardingSubstitutionMap();
     auto *differentialRef =
         builder.createFunctionRef(loc, &getDifferential());
     auto *differentialPartialApply = builder.createPartialApply(
         loc, differentialRef, jvpSubstMap, {diffStructVal},
         ParameterConvention::Direct_Guaranteed);

     // Return a tuple of the original result and pullback.
     SmallVector<SILValue, 8> directResults;
     directResults.append(origResults.begin(), origResults.end());
     directResults.push_back(differentialPartialApply);
     builder.createReturn(
         ri->getLoc(), joinElements(directResults, builder, loc));
   }

   void visitBranchInst(BranchInst *bi) {
     llvm_unreachable("Unsupported SIL instruction.");
   }

   void visitCondBranchInst(CondBranchInst *cbi) {
     llvm_unreachable("Unsupported SIL instruction.");
   }

   void visitSwitchEnumInst(SwitchEnumInst *sei) {
     llvm_unreachable("Unsupported SIL instruction.");
   }

   void visitDifferentiableFunctionInst(DifferentiableFunctionInst *dfi) {
     // Clone `differentiable_function` from original to JVP, then add the cloned
     // instruction to the `differentiable_function` worklist.
     TypeSubstCloner::visitDifferentiableFunctionInst(dfi);
     auto *newDFI = cast<DifferentiableFunctionInst>(getOpValue(dfi));
     context.getDifferentiableFunctionInsts().push_back(newDFI);
   }
 };
 } // end anonymous namespace

 //===----------------------------------------------------------------------===//
 // PullbackEmitter - visitors on the original function for pullback code
 // generation
 //===----------------------------------------------------------------------===//

 namespace {
 class PullbackEmitter final : public SILInstructionVisitor<PullbackEmitter> {
 private:
   /// The parent VJP emitter.
   VJPEmitter &vjpEmitter;

   /// Dominance info for the original function.
   DominanceInfo *domInfo = nullptr;

   /// Post-dominance info for the original function.
   PostDominanceInfo *postDomInfo = nullptr;

   /// Post-order info for the original function.
   PostOrderFunctionInfo *postOrderInfo = nullptr;

   /// Mapping from original basic blocks to corresponding pullback basic blocks.
   /// Pullback basic blocks always have the predecessor as the single argument.
   DenseMap<SILBasicBlock *, SILBasicBlock *> pullbackBBMap;

   /// Mapping from original basic blocks and original values to corresponding
   /// adjoint values.
   DenseMap<std::pair<SILBasicBlock *, SILValue>, AdjointValue> valueMap;

   /// Mapping from original basic blocks and original buffers to corresponding
   /// adjoint buffers.
   DenseMap<std::pair<SILBasicBlock *, SILValue>, SILValue> bufferMap;

   /// Mapping from pullback basic blocks to pullback struct arguments.
   DenseMap<SILBasicBlock *, SILArgument *> pullbackStructArguments;

   /// Mapping from pullback struct field declarations to pullback struct
   /// elements destructured from the linear map basic block argument. In the
   /// beginning of each pullback basic block, the block's pullback struct is
   /// destructured into individual elements stored here.
   DenseMap<VarDecl *, SILValue> pullbackStructElements;

   /// Mapping from original basic blocks and successor basic blocks to
   /// corresponding pullback trampoline basic blocks. Trampoline basic blocks
   /// take additional arguments in addition to the predecessor enum argument.
   DenseMap<std::pair<SILBasicBlock *, SILBasicBlock *>, SILBasicBlock *>
       pullbackTrampolineBBMap;

   /// Mapping from original basic blocks to dominated active values.
   DenseMap<SILBasicBlock *, SmallVector<SILValue, 8>> activeValues;

   /// Mapping from original basic blocks and original active values to
   /// corresponding pullback block arguments.
   DenseMap<std::pair<SILBasicBlock *, SILValue>, SILArgument *>
       activeValuePullbackBBArgumentMap;

   /// Mapping from original basic blocks to local temporary values to be cleaned
   /// up. This is populated when pullback emission is run on one basic block and
   /// cleaned before processing another basic block.
   DenseMap<SILBasicBlock *, SmallVector<SILValue, 64>>
       blockTemporaries;

   llvm::DenseSet<SILValue> blockTemporarySet;

   /// The main builder.
   SILBuilder builder;

   /// An auxiliary local allocation builder.
   SILBuilder localAllocBuilder;

   /// Stack buffers allocated for storing local adjoint values.
   SmallVector<SILValue, 64> functionLocalAllocations;

   /// A set used to remember local allocations that were destroyed.
   llvm::SmallDenseSet<SILValue> destroyedLocalAllocations;

   /// The seed argument in the pullback function.
   SILArgument *seed = nullptr;

   llvm::BumpPtrAllocator allocator;

   bool errorOccurred = false;

   ADContext &getContext() const { return vjpEmitter.context; }
   SILModule &getModule() const { return getContext().getModule(); }
   ASTContext &getASTContext() const { return getPullback().getASTContext(); }
   SILFunction &getOriginal() const { return *vjpEmitter.original; }
   SILFunction &getPullback() const { return *vjpEmitter.pullback; }
   SILDifferentiableAttr *getAttr() const { return vjpEmitter.attr; }
   DifferentiationInvoker getInvoker() const { return vjpEmitter.invoker; }
   LinearMapInfo &getPullbackInfo() { return vjpEmitter.pullbackInfo; }
   const SILAutoDiffIndices &getIndices() const {
     return vjpEmitter.getIndices();
   }
   const DifferentiableActivityInfo &getActivityInfo() const {
     return vjpEmitter.activityInfo;
   }

 public:
   explicit PullbackEmitter(VJPEmitter &vjpEmitter)
       : vjpEmitter(vjpEmitter), builder(getPullback()),
         localAllocBuilder(getPullback()) {
     // Get dominance and post-order info for the original function.
     auto &passManager = getContext().getPassManager();
     auto *domAnalysis = passManager.getAnalysis<DominanceAnalysis>();
     auto *postDomAnalysis = passManager.getAnalysis<PostDominanceAnalysis>();
     auto *postOrderAnalysis = passManager.getAnalysis<PostOrderAnalysis>();
     domInfo = domAnalysis->get(vjpEmitter.original);
     postDomInfo = postDomAnalysis->get(vjpEmitter.original);
     postOrderInfo = postOrderAnalysis->get(vjpEmitter.original);
   }

 private:
   //--------------------------------------------------------------------------//
   // Pullback struct mapping
   //--------------------------------------------------------------------------//

   void initializePullbackStructElements(SILBasicBlock *origBB,
                                         SILInstructionResultArray values) {
     auto *pbStructDecl = getPullbackInfo().getLinearMapStruct(origBB);
     assert(pbStructDecl->getStoredProperties().size() == values.size() &&
            "The number of pullback struct fields must equal the number of "
            "pullback struct element values");
     for (auto pair : llvm::zip(pbStructDecl->getStoredProperties(), values)) {
       assert(
           std::get<1>(pair).getOwnershipKind() != ValueOwnershipKind::Guaranteed
               && "Pullback struct elements must be @owned");
       auto insertion =
           pullbackStructElements.insert({std::get<0>(pair), std::get<1>(pair)});
       (void)insertion;
       assert(insertion.second && "A pullback struct element already exists!");
     }
   }

   SILValue getPullbackStructElement(SILBasicBlock *origBB, VarDecl *field) {
     assert(getPullbackInfo().getLinearMapStruct(origBB) ==
                cast<StructDecl>(field->getDeclContext()));
     assert(pullbackStructElements.count(field) &&
            "Pullback struct element for this field does not exist!");
     return pullbackStructElements.lookup(field);
   }

   //--------------------------------------------------------------------------//
   // Adjoint value factory methods
   //--------------------------------------------------------------------------//

   AdjointValue makeZeroAdjointValue(SILType type);

   AdjointValue makeConcreteAdjointValue(SILValue value);

   template<typename EltRange>
   AdjointValue makeAggregateAdjointValue(SILType type, EltRange elements);

   //--------------------------------------------------------------------------//
   // Temporary value management
   //--------------------------------------------------------------------------//

   /// Record a temporary value for cleanup before its block's terminator.
   SILValue recordTemporary(SILValue value) {
     assert(value->getType().isObject());
     blockTemporaries[value->getParentBlock()].push_back(value);
     LLVM_DEBUG(getADDebugStream() << "Recorded temporary " << value);
     auto insertion = blockTemporarySet.insert(value); (void)insertion;
     assert(insertion.second && "Temporary already recorded?");
     return value;
   }

   /// Clean up all temporary values for the given block.
   void cleanUpTemporariesForBlock(SILBasicBlock *bb, SILLocation loc) {
     LLVM_DEBUG(getADDebugStream() << "Cleaning up temporaries for bb"
                << bb->getDebugID() << '\n');
     for (auto temp : blockTemporaries[bb]) {
       builder.emitDestroyValueOperation(loc, temp);
       blockTemporarySet.erase(temp);
     }
   }

   //--------------------------------------------------------------------------//
   // Symbolic value materializers
   //--------------------------------------------------------------------------//

   /// Materialize an adjoint value. The type of the given adjoint value must be
   /// loadable.
   SILValue materializeAdjointDirect(AdjointValue val, SILLocation loc);

   /// Materialize an adjoint value indirectly to a SIL buffer.
   void materializeAdjointIndirect(AdjointValue val, SILValue destBuffer,
                                   SILLocation loc);

   //--------------------------------------------------------------------------//
   // Helpers for symbolic value materializers
   //--------------------------------------------------------------------------//

   /// Emit a zero value by calling `AdditiveArithmetic.zero`. The given type
   /// must conform to `AdditiveArithmetic`.
   void emitZeroIndirect(CanType type, SILValue bufferAccess, SILLocation loc);

   /// Emit a zero value by calling `AdditiveArithmetic.zero`. The given type
   /// must conform to `AdditiveArithmetic` and be loadable in SIL.
   SILValue emitZeroDirect(CanType type, SILLocation loc);

   //--------------------------------------------------------------------------//
   // Accumulator
   //--------------------------------------------------------------------------//

   /// Materialize an adjoint value in the most efficient way.
   SILValue materializeAdjoint(AdjointValue val, SILLocation loc);

   /// Given two adjoint values, accumulate them.
   AdjointValue accumulateAdjointsDirect(AdjointValue lhs, AdjointValue rhs,
                                         SILLocation loc);

   /// Given two materialized adjoint values, accumulate them. These two
   /// adjoints must be objects of loadable type.
   SILValue accumulateDirect(SILValue lhs, SILValue rhs, SILLocation loc);

   /// Given two materialized adjoint values, accumulate them using
   /// `AdditiveArithmetic.+`, depending on the differentiation mode.
   void accumulateIndirect(SILValue resultBufAccess,
                           SILValue lhsBufAccess, SILValue rhsBufAccess,
                           SILLocation loc);

   /// Given two buffers of an `AdditiveArithmetic` type, accumulate the right
   /// hand side into the left hand side using `+=`.
   void accumulateIndirect(SILValue lhsDestAccess, SILValue rhsAccess,
                           SILLocation loc);

   //--------------------------------------------------------------------------//
   // Type transformer
   //--------------------------------------------------------------------------//

   /// Remap any archetypes into the current function's context.
   SILType remapType(SILType ty) {
     if (ty.hasArchetype())
       return getPullback().mapTypeIntoContext(ty.mapTypeOutOfContext());
     return getPullback().mapTypeIntoContext(ty);
   }

   Optional<VectorSpace> getTangentSpace(CanType type) {
     return type->getAutoDiffAssociatedTangentSpace(
         LookUpConformanceInModule(getModule().getSwiftModule()));
   }

   /// Assuming the given type conforms to `Differentiable` after remapping,
   /// returns the associated tangent space type.
   SILType getRemappedTangentType(SILType type) {
     return SILType::getPrimitiveType(
         getTangentSpace(remapType(type).getASTType())->getCanonicalType(),
         type.getCategory());
   }

   /// Substitutes all replacement types of the given substitution map using the
   /// pullback function's substitution map.
   SubstitutionMap remapSubstitutionMap(SubstitutionMap substMap) {
     return substMap.subst(getPullback().getForwardingSubstitutionMap());
   }

   //--------------------------------------------------------------------------//
   // Managed value mapping
   //--------------------------------------------------------------------------//

   /// Returns true if the original value has a corresponding adjoint value.
   bool hasAdjointValue(SILBasicBlock *origBB, SILValue originalValue) const {
     assert(origBB->getParent() == &getOriginal());
     assert(originalValue->getType().isObject());
     return valueMap.count({origBB, originalValue});
   }

   /// Initializes an original value's corresponding adjoint value. It must not
   /// have an adjoint value before this function is called.
   void setAdjointValue(SILBasicBlock *origBB, SILValue originalValue,
                        AdjointValue adjointValue) {
     LLVM_DEBUG(getADDebugStream() << "Setting adjoint value for "
                                   << originalValue);
     assert(origBB->getParent() == &getOriginal());
     assert(originalValue->getType().isObject());
     assert(adjointValue.getType().isObject());
     assert(originalValue->getFunction() == &getOriginal());
     // The adjoint value must be in the tangent space.
     assert(adjointValue.getType() ==
                getRemappedTangentType(originalValue->getType()));
     auto insertion = valueMap.try_emplace({origBB, originalValue},
                                           adjointValue);
     LLVM_DEBUG(getADDebugStream()
                    << "The existing adjoint value will be replaced: "
                    << insertion.first->getSecond());
     if (!insertion.second)
       insertion.first->getSecond() = adjointValue;
   }

   /// Get the adjoint for an original value. The given value must be in the
   /// original function.
   ///
   /// This method first tries to find an entry in `adjointMap`. If an adjoint
   /// doesn't exist, create a zero adjoint.
   AdjointValue getAdjointValue(SILBasicBlock *origBB, SILValue originalValue) {
     assert(origBB->getParent() == &getOriginal());
     assert(originalValue->getType().isObject());
     assert(originalValue->getFunction() == &getOriginal());
     auto insertion = valueMap.try_emplace(
         {origBB, originalValue}, makeZeroAdjointValue(
             getRemappedTangentType(originalValue->getType())));
     auto it = insertion.first;
     return it->getSecond();
   }

   /// Add an adjoint value for the given original value.
   void addAdjointValue(SILBasicBlock *origBB, SILValue originalValue,
                        AdjointValue newAdjointValue, SILLocation loc) {
     assert(origBB->getParent() == &getOriginal());
     assert(originalValue->getType().isObject());
     assert(newAdjointValue.getType().isObject());
     assert(originalValue->getFunction() == &getOriginal());
     LLVM_DEBUG(getADDebugStream() << "Adding adjoint for " << originalValue);
     // The adjoint value must be in the tangent space.
     assert(newAdjointValue.getType() ==
                getRemappedTangentType(originalValue->getType()));
     auto insertion =
         valueMap.try_emplace({origBB, originalValue}, newAdjointValue);
     auto inserted = insertion.second;
     if (inserted)
       return;
     // If adjoint already exists, accumulate the adjoint onto the existing
     // adjoint.
     auto it = insertion.first;
     auto existingValue = it->getSecond();
     valueMap.erase(it);
     auto adjVal = accumulateAdjointsDirect(existingValue, newAdjointValue, loc);
     setAdjointValue(origBB, originalValue, adjVal);
   }

   /// Get the pullback block argument corresponding to the given original block
   /// and active value.
   SILArgument *getActiveValuePullbackBlockArgument(SILBasicBlock *origBB,
                                                    SILValue activeValue) {
     assert(origBB->getParent() == &getOriginal());
     auto pullbackBBArg =
         activeValuePullbackBBArgumentMap[{origBB, activeValue}];
     assert(pullbackBBArg);
     assert(pullbackBBArg->getParent() == getPullbackBlock(origBB));
     return pullbackBBArg;
   }

   //--------------------------------------------------------------------------//
   // Buffer mapping
   //--------------------------------------------------------------------------//

   void setAdjointBuffer(SILBasicBlock *origBB,
                         SILValue originalBuffer,
                         SILValue adjointBuffer) {
     assert(originalBuffer->getType().isAddress());
     auto insertion =
         bufferMap.try_emplace({origBB, originalBuffer}, adjointBuffer);
     assert(insertion.second); (void)insertion;
   }

   SILValue getAdjointProjection(SILBasicBlock *origBB,
                                 SILValue originalProjection) {
     // Handle `struct_element_addr`.
     if (auto *seai = dyn_cast<StructElementAddrInst>(originalProjection)) {
       auto adjSource = getAdjointBuffer(origBB, seai->getOperand());
       auto *tangentVectorDecl =
           adjSource->getType().getStructOrBoundGenericStruct();
       auto tanFieldLookup =
           tangentVectorDecl->lookupDirect(seai->getField()->getName());
       assert(tanFieldLookup.size() == 1);
       auto *tanField = cast<VarDecl>(tanFieldLookup.front());
       return builder.createStructElementAddr(
           seai->getLoc(), adjSource, tanField);
     }
     // Handle `tuple_element_addr`.
     if (auto *teai = dyn_cast<TupleElementAddrInst>(originalProjection)) {
       auto source = teai->getOperand();
       auto adjSource = getAdjointBuffer(origBB, source);
       if (!adjSource->getType().is<TupleType>())
         return adjSource;
       auto origTupleTy = source->getType().castTo<TupleType>();
       unsigned adjIndex = 0;
       for (unsigned i : range(teai->getFieldNo())) {
         if (getTangentSpace(
                 origTupleTy->getElement(i).getType()->getCanonicalType()))
           ++adjIndex;
       }
       return builder.createTupleElementAddr(
           teai->getLoc(), adjSource, adjIndex);
     }
     // Handle `begin_access`.
     if (auto *bai = dyn_cast<BeginAccessInst>(originalProjection)) {
       auto adjBase = getAdjointBuffer(origBB, bai->getOperand());
       if (errorOccurred)
         return (bufferMap[{origBB, originalProjection}] = SILValue());
       // Return the base buffer's adjoint buffer.
       return adjBase;
     }
     return SILValue();
   }

   SILBasicBlock::iterator getNextFunctionLocalAllocationInsertionPoint() {
     // If there are no local allocations, insert at the pullback entry start.
     if (functionLocalAllocations.empty())
       return getPullback().getEntryBlock()->begin();
     // Otherwise, insert before the last local allocation. Inserting before
     // rather than after ensures that allocation and zero initialization
     // instructions are grouped together.
     auto lastLocalAlloc = functionLocalAllocations.back();
     return lastLocalAlloc->getDefiningInstruction()->getIterator();
   }

   SILValue &getAdjointBuffer(SILBasicBlock *origBB, SILValue originalBuffer) {
     assert(originalBuffer->getType().isAddress());
     assert(originalBuffer->getFunction() == &getOriginal());
     auto insertion = bufferMap.try_emplace({origBB, originalBuffer},
                                            SILValue());
     if (!insertion.second) // not inserted
       return insertion.first->getSecond();

     // If the original buffer is a projection, return a corresponding projection
     // into the adjoint buffer.
     if (auto adjProj = getAdjointProjection(origBB, originalBuffer))
       return (bufferMap[{origBB, originalBuffer}] = adjProj);

     // Set insertion point for local allocation builder: before the last local
     // allocation, or at the start of the pullback function's entry if no local
     // allocations exist yet.
     localAllocBuilder.setInsertionPoint(
         getPullback().getEntryBlock(),
         getNextFunctionLocalAllocationInsertionPoint());
     // Allocate local buffer and initialize to zero.
     auto bufObjectType = getRemappedTangentType(originalBuffer->getType());
     auto *newBuf = localAllocBuilder.createAllocStack(
         RegularLocation::getAutoGeneratedLocation(), bufObjectType);
     // Temporarily change global builder insertion point and emit zero into the
     // local buffer.
     auto insertionPoint = builder.getInsertionBB();
     builder.setInsertionPoint(
         localAllocBuilder.getInsertionBB(),
         localAllocBuilder.getInsertionPoint());
     emitZeroIndirect(bufObjectType.getASTType(), newBuf, newBuf->getLoc());
     builder.setInsertionPoint(insertionPoint);
     // Register the local buffer.
     functionLocalAllocations.push_back(newBuf);
     return (insertion.first->getSecond() = newBuf);
   }

   // Accumulates `rhsBufferAccess` into the adjoint buffer corresponding to
   // `originalBuffer`.
   void addToAdjointBuffer(SILBasicBlock *origBB, SILValue originalBuffer,
                           SILValue rhsBufferAccess, SILLocation loc) {
     assert(originalBuffer->getType().isAddress() &&
            rhsBufferAccess->getType().isAddress());
     assert(originalBuffer->getFunction() == &getOriginal());
     assert(rhsBufferAccess->getFunction() == &getPullback());
     auto adjointBuffer = getAdjointBuffer(origBB, originalBuffer);
     accumulateIndirect(adjointBuffer, rhsBufferAccess, loc);
   }

   //--------------------------------------------------------------------------//
   // CFG mapping
   //--------------------------------------------------------------------------//

   SILBasicBlock *getPullbackBlock(SILBasicBlock *originalBlock) {
     return pullbackBBMap.lookup(originalBlock);
   }

   SILBasicBlock *getPullbackTrampolineBlock(
       SILBasicBlock *originalBlock, SILBasicBlock *successorBlock) {
     return pullbackTrampolineBBMap.lookup({originalBlock, successorBlock});
   }

 public:
   //--------------------------------------------------------------------------//
   // Entry point
   //--------------------------------------------------------------------------//

   /// Performs pullback generation on the empty pullback function. Returns true
   /// if any error occurs.
   bool run() {
     auto &original = getOriginal();
     auto &pullback = getPullback();
     auto pbLoc = getPullback().getLocation();
     LLVM_DEBUG(getADDebugStream() << "Running PullbackEmitter on\n"
                                   << original);

     auto *pbGenEnv = getPullback().getGenericEnvironment();
     auto pbGenSig = pbGenEnv
         ? pbGenEnv->getGenericSignature()->getCanonicalSignature()
         : nullptr;
     Lowering::GenericContextScope genericContextScope(
         getContext().getTypeConverter(), pbGenSig);
     auto origExitIt = original.findReturnBB();
     assert(origExitIt != original.end() &&
            "Functions without returns must have been diagnosed");
     auto *origExit = &*origExitIt;

     SmallVector<SILValue, 8> origFormalResults;
     collectAllFormalResultsInTypeOrder(original, origFormalResults);
     auto origResult = origFormalResults[getIndices().source];

     // If original result is non-varied, it will always have a zero derivative.
     // Skip full pullback generation and simply emit zero derivatives for wrt
     // parameters.
     //
     // NOTE(TF-876): This shortcut is currently necessary for functions
     // returning non-varied result with >1 basic block where some basic blocks
     // have no dominated active values; control flow differentiation does not
     // handle this case. See TF-876 for context.
     if (!getActivityInfo().isVaried(origResult, getIndices().parameters)) {
       emitZeroDerivativesForNonvariedResult(origResult);
       return false;
     }

     // Get dominated active values in original blocks.
     // Adjoint values of dominated active values are passed as pullback block
     // arguments.
     DominanceOrder domOrder(original.getEntryBlock(), domInfo);
     while (auto *bb = domOrder.getNext()) {
       auto &bbActiveValues = activeValues[bb];
       // If the current block has an immediate dominator, append the immediate
       // dominator block's active values to the current block's active values.
       if (auto *domNode = domInfo->getNode(bb)->getIDom()) {
         auto &domBBActiveValues = activeValues[domNode->getBlock()];
         bbActiveValues.append(domBBActiveValues.begin(),
                               domBBActiveValues.end());
       }
       SmallPtrSet<SILValue, 8> visited(bbActiveValues.begin(),
                                        bbActiveValues.end());
       // Register a value as active if it has not yet been visited.
       auto addActiveValue = [&](SILValue v) {
         if (visited.count(v))
           return;
         // Diagnose active enum values. Differentiation of enum values is not
         // yet supported; requires special adjoint value handling.
         if (v->getType().getEnumOrBoundGenericEnum()) {
           getContext().emitNondifferentiabilityError(
               v, getInvoker(), diag::autodiff_enums_unsupported);
           errorOccurred = true;
         }
         // Skip address projections.
         // Address projections do not need their own adjoint buffers; they
         // become projections into their adjoint base buffer.
         if (Projection::isAddressProjection(v))
           return;
         visited.insert(v);
         bbActiveValues.push_back(v);
       };
       // Register bb arguments and all instruction operands/results.
       for (auto *arg : bb->getArguments())
         if (getActivityInfo().isActive(arg, getIndices()))
           addActiveValue(arg);
       for (auto &inst : *bb) {
         for (auto op : inst.getOperandValues())
           if (getActivityInfo().isActive(op, getIndices()))
             addActiveValue(op);
         for (auto result : inst.getResults())
           if (getActivityInfo().isActive(result, getIndices()))
             addActiveValue(result);
       }
       domOrder.pushChildren(bb);
       if (errorOccurred)
         return true;
     }

     // Create pullback blocks and arguments, visiting original blocks in
     // post-order post-dominance order.
     SmallVector<SILBasicBlock *, 8> postOrderPostDomOrder;
     // Start from the root node, which may have a marker `nullptr` block if
     // there are multiple roots.
     PostOrderPostDominanceOrder postDomOrder(postDomInfo->getRootNode(),
                                              postOrderInfo, original.size());
     while (auto *origNode = postDomOrder.getNext()) {
       auto *origBB = origNode->getBlock();
       postDomOrder.pushChildren(origNode);
       // If node is the `nullptr` marker basic block, do not push it.
       if (!origBB)
         continue;
       postOrderPostDomOrder.push_back(origBB);
     }
     for (auto *origBB : postOrderPostDomOrder) {
       auto *pullbackBB = pullback.createBasicBlock();
       pullbackBBMap.insert({origBB, pullbackBB});
       auto pbStructLoweredType =
           remapType(getPullbackInfo().getLinearMapStructLoweredType(origBB));
       // If the BB is the original exit, then the pullback block that we just
       // created must be the pullback function's entry. For the pullback entry,
       // create entry arguments and continue to the next block.
       if (origBB == origExit) {
         assert(pullbackBB->isEntry());
         createEntryArguments(&pullback);
         auto *mainPullbackStruct = pullbackBB->getArguments().back();
         assert(mainPullbackStruct->getType() == pbStructLoweredType);
         pullbackStructArguments[origBB] = mainPullbackStruct;
         // Destructure the pullback struct to get the elements.
         builder.setInsertionPoint(pullbackBB);
         auto *dsi = builder.createDestructureStruct(pbLoc, mainPullbackStruct);
         initializePullbackStructElements(origBB, dsi->getResults());
         continue;
       }
       // Get all active values in the original block.
       // If the original block has no active values, continue.
       auto &bbActiveValues = activeValues[origBB];
       if (bbActiveValues.empty())
         continue;
       // Otherwise, if the original block has active values:
       // - For each active buffer in the original block, allocate a new local
       //   buffer in the pullback entry. (All adjoint buffers are allocated in
       //   the pullback entry and deallocated in the pullback exit.)
       // - For each active value in the original block, add adjoint value
       //   arguments to the pullback block.
       for (auto activeValue : bbActiveValues) {
         if (activeValue->getType().isAddress()) {
           // Allocate and zero initialize a new local buffer using
           // `getAdjointBuffer`.
           builder.setInsertionPoint(pullback.getEntryBlock());
           getAdjointBuffer(origBB, activeValue);
         } else {
           // Create and register pullback block argument for the active value.
           auto *pullbackArg = pullbackBB->createPhiArgument(
               getRemappedTangentType(activeValue->getType()),
               ValueOwnershipKind::Owned);
           activeValuePullbackBBArgumentMap[{origBB, activeValue}] = pullbackArg;
           recordTemporary(pullbackArg);
         }
       }
       // Add a pullback struct argument.
       auto *pbStructArg = pullbackBB->createPhiArgument(
           pbStructLoweredType, ValueOwnershipKind::Owned);
       pullbackStructArguments[origBB] = pbStructArg;
       // Destructure the pullback struct to get the elements.
       builder.setInsertionPoint(pullbackBB);
       auto *dsi = builder.createDestructureStruct(pbLoc, pbStructArg);
       initializePullbackStructElements(origBB, dsi->getResults());

       // - Create pullback trampoline blocks for each successor block of the
       //   original block. Pullback trampoline blocks only have a pullback
       //   struct argument. They branch from a pullback successor block to the
       //   pullback original block, passing adjoint values of active values.
       for (auto *succBB : origBB->getSuccessorBlocks()) {
         auto *pullbackTrampolineBB =
             pullback.createBasicBlockBefore(pullbackBB);
         pullbackTrampolineBBMap.insert({{origBB, succBB},
                                        pullbackTrampolineBB});
         // Get the enum element type (i.e. the pullback struct type). The enum
         // element type may be boxed if the enum is indirect.
         auto enumLoweredTy =
             getPullbackInfo().getBranchingTraceEnumLoweredType(succBB);
         auto *enumEltDecl =
             getPullbackInfo().lookUpBranchingTraceEnumElement(origBB, succBB);
         auto enumEltType = remapType(
             enumLoweredTy.getEnumElementType(enumEltDecl, getModule()));
         pullbackTrampolineBB->createPhiArgument(enumEltType,
                                                 ValueOwnershipKind::Owned);
       }
     }

     auto *pullbackEntry = pullback.getEntryBlock();
     // The pullback function has type (seed, exit_pbs) -> ([arg0], ..., [argn]).
     auto pbParamArgs = pullback.getArgumentsWithoutIndirectResults();
     assert(pbParamArgs.size() == 2);
     seed = pbParamArgs[0];

     // Assign adjoint for original result.
     builder.setInsertionPoint(
         pullbackEntry, getNextFunctionLocalAllocationInsertionPoint());
     if (seed->getType().isAddress()) {
       auto *seedBufCopy = builder.createAllocStack(pbLoc, seed->getType());
       builder.createCopyAddr(pbLoc, seed, seedBufCopy, IsNotTake,
                              IsInitialization);
       setAdjointBuffer(origExit, origResult, seedBufCopy);
       functionLocalAllocations.push_back(seedBufCopy);
       LLVM_DEBUG(getADDebugStream()
                  << "Assigned seed buffer " << seedBufCopy
                  << " as the adjoint of original indirect result "
                  << origResult);
     } else {
       setAdjointValue(origExit, origResult, makeConcreteAdjointValue(seed));
       LLVM_DEBUG(getADDebugStream()
                  << "Assigned seed " << *seed
                  << " as the adjoint of original result " << origResult);
     }

     // Visit original blocks blocks in post-order and perform differentiation
     // in corresponding pullback blocks. If errors occurred, back out.
     for (auto *bb : postOrderPostDomOrder) {
       visitSILBasicBlock(bb);
       if (errorOccurred)
         return true;
     }

     // Prepare and emit a `return` in the pullback exit block.
     auto *origEntry = getOriginal().getEntryBlock();
     auto *pbExit = getPullbackBlock(origEntry);
     builder.setInsertionPoint(pbExit);

     // This vector will contain all the materialized return elements.
     SmallVector<SILValue, 8> retElts;
     // This vector will contain all indirect parameter adjoint buffers.
     SmallVector<SILValue, 4> indParamAdjoints;

     auto origParams = getOriginal().getArgumentsWithoutIndirectResults();

     // Materializes the return element corresponding to the parameter
     // `parameterIndex` into the `retElts` vector.
     auto addRetElt = [&](unsigned parameterIndex) -> void {
       auto origParam = origParams[parameterIndex];
       if (origParam->getType().isObject()) {
         auto pbVal = getAdjointValue(origEntry, origParam);
         auto val = materializeAdjointDirect(pbVal, pbLoc);
         auto newVal = builder.emitCopyValueOperation(pbLoc, val);
         retElts.push_back(newVal);
       } else {
         auto adjBuf = getAdjointBuffer(origEntry, origParam);
         indParamAdjoints.push_back(adjBuf);
       }
     };
     // Collect differentiation parameter adjoints.
     for (auto i : getIndices().parameters->getIndices())
       addRetElt(i);

     // Copy them to adjoint indirect results.
     assert(indParamAdjoints.size() ==
                getPullback().getIndirectResults().size() &&
            "Indirect parameter adjoint count mismatch");
     for (auto pair : zip(indParamAdjoints,
                              getPullback().getIndirectResults())) {
       auto source = std::get<0>(pair);
       auto *dest = std::get<1>(pair);
       builder.createCopyAddr(pbLoc, source, dest, IsTake, IsInitialization);
       // Prevent source buffer from being deallocated, since the underlying
       // value is moved.
       destroyedLocalAllocations.insert(source);
     }

     // Emit cleanups for all local values.
     cleanUpTemporariesForBlock(pbExit, pbLoc);
     // Deallocate local allocations.
     for (auto alloc : functionLocalAllocations) {
       // Assert that local allocations have at least one use.
       // Buffers should not be allocated needlessly.
       assert(!alloc->use_empty());
       if (!destroyedLocalAllocations.count(alloc)) {
         builder.emitDestroyAddrAndFold(pbLoc, alloc);
         destroyedLocalAllocations.insert(alloc);
       }
       builder.createDeallocStack(pbLoc, alloc);
     }
     builder.createReturn(pbLoc, joinElements(retElts, builder, pbLoc));

 #ifndef NDEBUG
     bool leakFound = false;
     // Ensure all temporaries have been cleaned up.
     for (auto &bb : pullback) {
       for (auto temp : blockTemporaries[&bb]) {
         if (blockTemporarySet.count(temp)) {
           leakFound = true;
           getADDebugStream() << "Found leaked temporary:\n" << temp;
         }
       }
     }
     // Ensure all local allocations have been cleaned up.
     for (auto localAlloc : functionLocalAllocations) {
       if (!destroyedLocalAllocations.count(localAlloc)) {
         leakFound = true;
         getADDebugStream() << "Found leaked local buffer:\n" << localAlloc;
       }
     }
     assert(!leakFound && "Leaks found!");
 #endif

     LLVM_DEBUG(getADDebugStream() << "Generated pullback for "
                                   << original.getName() << ":\n" << pullback);
     return errorOccurred;
   }

   /// If original result is non-varied, it will always have a zero derivative.
   /// Skip full pullback generation and simply emit zero derivatives for wrt
   /// parameters.
   void emitZeroDerivativesForNonvariedResult(SILValue origNonvariedResult) {
     auto &pullback = getPullback();
     auto pbLoc = getPullback().getLocation();
     /*
     // TODO(TF-788): Re-enable non-varied result warning.
     // Emit fixit if original non-varied result has a valid source location.
     auto startLoc = origNonvariedResult.getLoc().getStartSourceLoc();
     auto endLoc = origNonvariedResult.getLoc().getEndSourceLoc();
     if (startLoc.isValid() && endLoc.isValid()) {
       getContext().diagnose(startLoc, diag::autodiff_nonvaried_result_fixit)
           .fixItInsert(startLoc, "withoutDerivative(at:")
           .fixItInsertAfter(endLoc, ")");
     }
     */
     LLVM_DEBUG(getADDebugStream() << getOriginal().getName()
                                   << " has non-varied result, returning zero"
                                      " for all pullback results\n");
     auto *pullbackEntry = pullback.createBasicBlock();
     createEntryArguments(&pullback);
     builder.setInsertionPoint(pullbackEntry);
     // Destroy all owned arguments.
     for (auto *arg : pullbackEntry->getArguments())
       if (arg->getOwnershipKind() == ValueOwnershipKind::Owned)
         builder.emitDestroyOperation(pbLoc, arg);
     // Return zero for each result.
     SmallVector<SILValue, 4> directResults;
     auto indirectResultIt = pullback.getIndirectResults().begin();
     for (auto resultInfo : pullback.getLoweredFunctionType()->getResults()) {
       auto resultType =
           pullback.mapTypeIntoContext(resultInfo.getType())->getCanonicalType();
       if (resultInfo.isFormalDirect())
         directResults.push_back(emitZeroDirect(resultType, pbLoc));
       else
         emitZeroIndirect(resultType, *indirectResultIt++, pbLoc);
     }
     builder.createReturn(pbLoc, joinElements(directResults, builder, pbLoc));
     LLVM_DEBUG(getADDebugStream() << "Generated pullback for "
                                   << getOriginal().getName() << ":\n"
                                   << pullback);
   }

   using TrampolineBlockSet = SmallPtrSet<SILBasicBlock *, 4>;

   /// Determine the pullback successor block for a given original block and one
   /// of its predecessors. When a trampoline block is necessary, emit code into
   /// the trampoline block to trampoline the original block's active value's
   /// adjoint values. A dense map `trampolineArgs` will be populated to keep
   /// track of which pullback successor blocks each active value's adjoint value
   /// is used, so that we can release those values in pullback successor blocks
   /// that are not using them.
   SILBasicBlock *buildPullbackSuccessor(
       SILBasicBlock *origBB, SILBasicBlock *origPredBB,
       SmallDenseMap<SILValue, TrampolineBlockSet> &pullbackTrampolineBlockMap) {
     // Get the pullback block and optional pullback trampoline block of the
     // predecessor block.
     auto *pullbackBB = getPullbackBlock(origPredBB);
     auto *pullbackTrampolineBB = getPullbackTrampolineBlock(origPredBB, origBB);
     // If the predecessor block does not have a corresponding pullback
     // trampoline block, then the pullback successor is the pullback block.
     if (!pullbackTrampolineBB)
       return pullbackBB;

     // Otherwise, the pullback successor is the pullback trampoline block,
     // which branches to the pullback block and propagates adjoint values of
     // active values.
     assert(pullbackTrampolineBB->getNumArguments() == 1);
     auto loc = origBB->getParent()->getLocation();
     SmallVector<SILValue, 8> trampolineArguments;
     // Propagate adjoint values/buffers of active values/buffers to
     // predecessor blocks.
     auto &predBBActiveValues = activeValues[origPredBB];
     for (auto activeValue : predBBActiveValues) {
       LLVM_DEBUG(getADDebugStream()
                  << "Propagating active adjoint " << activeValue
                  << " to predecessors' pullback blocks\n");
       if (activeValue->getType().isObject()) {
         auto activeValueAdj = getAdjointValue(origBB, activeValue);
         auto concreteActiveValueAdj =
             materializeAdjointDirect(activeValueAdj, loc);

         if (!pullbackTrampolineBlockMap.count(concreteActiveValueAdj)) {
           concreteActiveValueAdj =
               builder.emitCopyValueOperation(loc, concreteActiveValueAdj);
           setAdjointValue(origBB, activeValue,
                           makeConcreteAdjointValue(concreteActiveValueAdj));
         }
         auto insertion = pullbackTrampolineBlockMap.try_emplace(
             concreteActiveValueAdj, TrampolineBlockSet());
         auto &blockSet = insertion.first->getSecond();
         blockSet.insert(pullbackTrampolineBB);
         trampolineArguments.push_back(concreteActiveValueAdj);

         // If the pullback block does not yet have a registered adjoint
         // value for the active value, set the adjoint value to the
         // forwarded adjoint value argument.
         // TODO: Hoist this logic out of loop over predecessor blocks to
         // remove the `hasAdjointValue` check.
         if (!hasAdjointValue(origPredBB, activeValue)) {
           auto *pullbackBBArg =
               getActiveValuePullbackBlockArgument(origPredBB, activeValue);
           auto forwardedArgAdj = makeConcreteAdjointValue(pullbackBBArg);
           setAdjointValue(origPredBB, activeValue, forwardedArgAdj);
         }
       } else {
         // Propagate adjoint buffers using `copy_addr`.
         auto adjBuf = getAdjointBuffer(origBB, activeValue);
         auto predAdjBuf = getAdjointBuffer(origPredBB, activeValue);
         builder.createCopyAddr(
             loc, adjBuf, predAdjBuf, IsNotTake, IsNotInitialization);
       }
     }
     // Propagate pullback struct argument.
     SILBuilder pullbackTrampolineBBBuilder(pullbackTrampolineBB);
     auto *predPBStructVal = pullbackTrampolineBB->getArguments().front();
     auto boxType =
         dyn_cast<SILBoxType>(predPBStructVal->getType().getASTType());
     if (!boxType) {
       trampolineArguments.push_back(predPBStructVal);
     } else {
       auto *projectBox = pullbackTrampolineBBBuilder.createProjectBox(
           loc, predPBStructVal, /*index*/ 0);
       auto loaded = pullbackTrampolineBBBuilder.emitLoadValueOperation(
           loc, projectBox, LoadOwnershipQualifier::Copy);
       pullbackTrampolineBBBuilder.emitDestroyValueOperation(loc,
                                                             predPBStructVal);
       trampolineArguments.push_back(loaded);
     }
     // Branch from pullback trampoline block to pullback block.
     pullbackTrampolineBBBuilder.createBranch(loc, pullbackBB,
                                              trampolineArguments);
     return pullbackTrampolineBB;
   }

   /// Emit pullback code in the corresponding pullback block.
   void visitSILBasicBlock(SILBasicBlock *bb) {
     auto pbLoc = getPullback().getLocation();
     // Get the corresponding pullback basic block.
     auto *pbBB = getPullbackBlock(bb);
     builder.setInsertionPoint(pbBB);

     LLVM_DEBUG({
       auto &s = getADDebugStream()
           << "Original bb" + std::to_string(bb->getDebugID())
           << ": To differentiate or not to differentiate?\n";
       for (auto &inst : reversed(*bb)) {
         s << (getPullbackInfo().shouldDifferentiateInstruction(&inst)
                   ? "[∂] " : "[ ] ")
           << inst;
       }
     });

     // Visit each instruction in reverse order.
     for (auto &inst : reversed(*bb)) {
       if (!getPullbackInfo().shouldDifferentiateInstruction(&inst))
         continue;
       // Differentiate instruction.
       visit(&inst);
       if (errorOccurred)
         return;
     }

     // Emit a branching terminator for the block.
     // If the original block is the original entry, then the pullback block is
     // the pullback exit. This is handled specially in `PullbackEmitter::run()`,
     // so we leave the block non-terminated.
     if (bb->isEntry())
       return;

     // Otherwise, add a `switch_enum` terminator for non-exit
     // pullback blocks.
     // 1. Get the pullback struct pullback block argument.
     // 2. Extract the predecessor enum value from the pullback struct value.
     auto *predEnum = getPullbackInfo().getBranchingTraceDecl(bb);
     auto *predEnumField =
         getPullbackInfo().lookUpLinearMapStructEnumField(bb);
     auto predEnumVal = getPullbackStructElement(bb, predEnumField);

     // Propagate adjoint values from active basic block arguments to
     // predecessor terminator operands.
     for (auto *bbArg : bb->getArguments()) {
       if (!getActivityInfo().isActive(bbArg, getIndices()))
         continue;
       // Get predecessor terminator operands.
       SmallVector<std::pair<SILBasicBlock *, SILValue>, 4> incomingValues;
       bbArg->getSingleTerminatorOperands(incomingValues);
       // Initialize adjoint value of predecessor terminator operands as
       // adjoint value of current block arguments.
       auto bbArgAdj = getAdjointValue(bb, bbArg);
       for (auto pair : incomingValues) {
         auto *predBB = std::get<0>(pair);
         auto incomingValue = std::get<1>(pair);
         setAdjointValue(predBB, incomingValue, bbArgAdj);
       }
     }

     // 3. Build the pullback successor cases for the `switch_enum`
     //    instruction. The pullback successors correspond to the predecessors
     //    of the current block.
     SmallVector<std::pair<EnumElementDecl *, SILBasicBlock *>, 4>
         pullbackSuccessorCases;
     // A map from active values' adjoint values to the trampoline blocks that
     // are using them.
     SmallDenseMap<SILValue, TrampolineBlockSet> pullbackTrampolineBlockMap;
     SmallVector<SILBasicBlock *, 8> pullbackSuccBBs;
     for (auto *predBB : bb->getPredecessorBlocks()) {
       auto *pullbackSuccBB = buildPullbackSuccessor(bb, predBB,
                                                     pullbackTrampolineBlockMap);
       pullbackSuccBBs.push_back(pullbackSuccBB);
       auto *enumEltDecl =
           getPullbackInfo().lookUpBranchingTraceEnumElement(predBB, bb);
       pullbackSuccessorCases.push_back({enumEltDecl, pullbackSuccBB});
     }
     // Values are trampolined by only a subset of pullback successor blocks.
     // Other successors blocks should destroy the value to balance the reference
     // count.
     for (auto pair : pullbackTrampolineBlockMap) {
       auto value = pair.getFirst();
       // The set of trampoline BBs that are users of `value`.
       auto &userTrampolineBBSet = pair.getSecond();
       // For each pullback successor block that does not trampoline the value,
       // release the value.
       for (auto *pullbackSuccBB : pullbackSuccBBs) {
         if (userTrampolineBBSet.count(pullbackSuccBB))
           continue;
         SILBuilder builder(pullbackSuccBB->begin());
         builder.emitDestroyValueOperation(pbLoc, value);
       }
     }
     // Emit cleanups for all block-local temporaries.
     cleanUpTemporariesForBlock(pbBB, pbLoc);
     // - If the original block has exactly one predecessor, then the pullback
     //   block has exactly one successor. Extract the pullback struct value
     //   from the predecessor enum value using `unchecked_take_enum_data_addr`
     //   and `load [take]`, and branch to the pullback successor block.
     assert(pullbackSuccessorCases.size() == predEnum->getNumElements());
     builder.createSwitchEnum(
         pbLoc, predEnumVal, /*DefaultBB*/ nullptr, pullbackSuccessorCases);
   }

   void visit(SILInstruction *inst) {
     if (errorOccurred)
       return;

     LLVM_DEBUG(getADDebugStream()
                << "PullbackEmitter visited:\n[ORIG]" << *inst);
 #ifndef NDEBUG
     auto beforeInsertion = std::prev(builder.getInsertionPoint());
 #endif
     SILInstructionVisitor::visit(inst);
     LLVM_DEBUG({
       auto &s = llvm::dbgs() << "[ADJ] Emitted in pullback:\n";
       auto afterInsertion = builder.getInsertionPoint();
       for (auto it = ++beforeInsertion; it != afterInsertion; ++it)
         s << *it;
     });
   }

   void visitSILInstruction(SILInstruction *inst) {
     LLVM_DEBUG(getADDebugStream()
                << "Unhandled instruction in PullbackEmitter: " << *inst);
     getContext().emitNondifferentiabilityError(inst, getInvoker(),
         diag::autodiff_expression_not_differentiable_note);
     errorOccurred = true;
   }

   AllocStackInst *
   emitArrayTangentSubscript(ApplyInst *ai, SILType eltType,
                             SILValue adjointArray, SILValue fnRef,
                             CanGenericSignature genericSig, int index) {
     auto &ctx = builder.getASTContext();
     auto astType = eltType.getASTType();
     auto literal = builder.createIntegerLiteral(
         ai->getLoc(), SILType::getBuiltinIntegerType(64, ctx), index);
     auto intType = SILType::getPrimitiveObjectType(
         ctx.getIntDecl()->getDeclaredType()->getCanonicalType());
     auto intStruct = builder.createStruct(ai->getLoc(), intType, {literal});
     AllocStackInst *subscriptBuffer =
         builder.createAllocStack(ai->getLoc(), eltType);
     auto swiftModule = getModule().getSwiftModule();
     auto diffProto = ctx.getProtocol(KnownProtocolKind::Differentiable);
     auto diffConf = swiftModule->lookupConformance(astType, diffProto);
     assert(diffConf.hasValue() && "Missing conformance to `Differentiable`");
     auto addArithProto = ctx.getProtocol(KnownProtocolKind::AdditiveArithmetic);
     auto addArithConf = swiftModule->lookupConformance(astType, addArithProto);
     assert(addArithConf.hasValue() &&
            "Missing conformance to `AdditiveArithmetic`");
     auto subMap =
         SubstitutionMap::get(genericSig, {astType}, {*addArithConf, *diffConf});
     builder.createApply(ai->getLoc(), fnRef, subMap,
                         {subscriptBuffer, intStruct, adjointArray});
     return subscriptBuffer;
   }

   void accumulateArrayTangentSubscriptDirect(ApplyInst *ai, SILType eltType,
                                              StoreInst *si,
                                              AllocStackInst *subscriptBuffer) {
     auto newAdjValue = builder.emitLoadValueOperation(
         ai->getLoc(), subscriptBuffer, LoadOwnershipQualifier::Take);
     recordTemporary(newAdjValue);
     SILValue src = si->getSrc();
     // When the store's source is a `copy_value`, the `copy_value` is part of
     // array literal initialization. In this case, add the adjoint to the source
     // of the copy directly.
     if (auto *cvi = dyn_cast<CopyValueInst>(src))
       src = cvi->getOperand();
     addAdjointValue(si->getParent(), src,
                     makeConcreteAdjointValue(newAdjValue), si->getLoc());
     blockTemporaries[ai->getParent()].push_back(newAdjValue);
     builder.createDeallocStack(ai->getLoc(), subscriptBuffer);
   }

   void accumulateArrayTangentSubscriptIndirect(
       ApplyInst *ai, CopyAddrInst *cai, AllocStackInst *subscriptBuffer) {
     addToAdjointBuffer(cai->getParent(), cai->getSrc(), subscriptBuffer,
                        cai->getLoc());
     builder.emitDestroyAddrAndFold(cai->getLoc(), subscriptBuffer);
     builder.createDeallocStack(ai->getLoc(), subscriptBuffer);
   }

   void visitArrayInitialization(ApplyInst *ai) {
     LLVM_DEBUG(getADDebugStream() << "Visiting array initialization:\n" << *ai);
     SILValue adjointArray;
     SILValue fnRef;
     CanGenericSignature genericSig;
     for (auto use : ai->getUses()) {
       auto *dti = dyn_cast<DestructureTupleInst>(use->getUser());
       if (!dti) continue;
       // The first tuple field of the return value is the `Array`.
       adjointArray = getAdjointValue(ai->getParent(), dti->getResult(0))
           .getConcreteValue();
       assert(adjointArray && "Array does not have adjoint value");
       auto astType = adjointArray->getType().getASTType();
       auto typeDecl = astType->getStructOrBoundGenericStruct();
       auto subscriptDecl = cast<SubscriptDecl>(typeDecl->lookupDirect(
           DeclBaseName::createSubscript()).front());
       auto subscriptGet = subscriptDecl->getAccessor(AccessorKind::Get);
       SILDeclRef subscriptRef(subscriptGet, SILDeclRef::Kind::Func);
       auto fnBuilder = SILOptFunctionBuilder(getContext().getTransform());
       auto fn = fnBuilder.getOrCreateFunction(
           ai->getLoc(), subscriptRef, NotForDefinition);
       genericSig = fn->getLoweredFunctionType()->getGenericSignature();
       fnRef = builder.createFunctionRef(ai->getLoc(), fn);
     }
     assert(adjointArray && "Array does not have adjoint value");
     assert(genericSig && "No generic signature");
     assert(fnRef && "Could not create `function_ref`");
     // Two loops because the `tuple_extract` instructions can be reached in
     // either order.
     for (auto use : ai->getUses()) {
       auto *dti = dyn_cast<DestructureTupleInst>(use->getUser());
       if (!dti) continue;
       // The second tuple field is the `RawPointer`.
       for (auto use : dti->getResult(1)->getUses()) {
         // The `RawPointer` passes through a `pointer_to_address`. That
         // instruction's first use is a `store` whose src is useful; its
         // subsequent uses are `index_addr`s whose only use is a useful
         // `store`. In the indirect case, each `store` is instead a
         // `copy_addr`.
         for (auto use : use->getUser()->getResult(0)->getUses()) {
           auto inst = use->getUser();
           if (auto si = dyn_cast<StoreInst>(inst)) {
             auto tanType = getRemappedTangentType(si->getSrc()->getType());
             auto subscriptBuffer = emitArrayTangentSubscript(
                 ai, tanType, adjointArray, fnRef, genericSig, 0);
             accumulateArrayTangentSubscriptDirect(
                 ai, tanType, si, subscriptBuffer);
           } else if (auto cai = dyn_cast<CopyAddrInst>(inst)) {
             auto tanType = getRemappedTangentType(cai->getSrc()->getType());
             auto subscriptBuffer = emitArrayTangentSubscript(
                 ai, tanType, adjointArray, fnRef, genericSig, 0);
             accumulateArrayTangentSubscriptIndirect(
                 ai, cai, subscriptBuffer);
           } else if (auto iai = dyn_cast<IndexAddrInst>(inst)) {
             for (auto use : iai->getUses()) {
               if (auto si = dyn_cast<StoreInst>(use->getUser())) {
                 auto literal = dyn_cast<IntegerLiteralInst>(iai->getIndex());
                 auto tanType = getRemappedTangentType(
                     si->getSrc()->getType());
                 auto subscriptBuffer = emitArrayTangentSubscript(
                     ai, tanType, adjointArray, fnRef,
                     genericSig, literal->getValue().getLimitedValue());
                 accumulateArrayTangentSubscriptDirect(
                     ai, tanType, si, subscriptBuffer);
               } else if (auto cai = dyn_cast<CopyAddrInst>(use->getUser())) {
                 auto literal = dyn_cast<IntegerLiteralInst>(iai->getIndex());
                 auto tanType = getRemappedTangentType(
                     cai->getSrc()->getType());
                 auto subscriptBuffer = emitArrayTangentSubscript(
                     ai, tanType, adjointArray, fnRef,
                     genericSig, literal->getValue().getLimitedValue());
                 accumulateArrayTangentSubscriptIndirect(
                     ai, cai, subscriptBuffer);
               }
             }
           }
         }
       }
     }
   }

   void visitApplyInst(ApplyInst *ai) {
     assert(getPullbackInfo().shouldDifferentiateApplyInst(ai));
     // Handle array uninitialized allocation intrinsic specially.
     if (isArrayLiteralIntrinsic(ai))
       return visitArrayInitialization(ai);
     // Replace a call to a function with a call to its pullback.
     auto &nestedApplyInfo = getContext().getNestedApplyInfo();
     auto applyInfoLookup = nestedApplyInfo.find(ai);
     // If no `NestedApplyInfo` was found, then this task doesn't need to be
     // differentiated.
     if (applyInfoLookup == nestedApplyInfo.end()) {
       // Must not be active.
       assert(!getActivityInfo().isActive(ai, getIndices()));
       return;
     }
     auto applyInfo = applyInfoLookup->getSecond();

     // Get the pullback.
     auto *field = getPullbackInfo().lookUpLinearMapDecl(ai);
     assert(field);
     auto loc = ai->getLoc();
     auto pullback = getPullbackStructElement(ai->getParent(), field);

     // Get the original result of the `apply` instruction.
     SmallVector<SILValue, 8> args;
     SmallVector<SILValue, 8> origDirectResults;
     forEachApplyDirectResult(ai, [&](SILValue directResult) {
       origDirectResults.push_back(directResult);
     });
     SmallVector<SILValue, 8> origAllResults;
     collectAllActualResultsInTypeOrder(ai, origDirectResults, origAllResults);
     assert(applyInfo.indices.source < origAllResults.size());
     auto origResult = origAllResults[applyInfo.indices.source];
     assert(origResult);
     auto origNumIndRes = ai->getNumIndirectResults();

     auto pullbackType =
         remapType(pullback->getType()).castTo<SILFunctionType>();

     // Get the seed (i.e. adjoint value of the original result).
     SILValue seed;
     auto *bb = ai->getParent();
     if (origResult->getType().isObject()) {
       // Otherwise, materialize adjoint value of `ai`.
       seed = materializeAdjoint(getAdjointValue(bb, origResult), loc);
     } else {
       seed = getAdjointBuffer(bb, origResult);
     }

     // Create allocations for pullback indirect results.
     SmallVector<AllocStackInst *, 4> pullbackIndirectResults;
     auto actualPullbackType = applyInfo.originalPullbackType
         ? *applyInfo.originalPullbackType
         : pullbackType;
     for (auto indRes : actualPullbackType->getIndirectFormalResults()) {
       auto *alloc =
           builder.createAllocStack(loc, remapType(indRes.getSILStorageType()));
       pullbackIndirectResults.push_back(alloc);
       args.push_back(alloc);
     }

     // If callee pullback was reabstracted in VJP, reabstract callee pullback.
     if (applyInfo.originalPullbackType) {
       SILOptFunctionBuilder fb(getContext().getTransform());
       auto *thunk = getOrCreateReabstractionThunk(
           fb, getContext().getModule(), loc, &getPullback(),
           pullbackType, *applyInfo.originalPullbackType);
       auto *thunkRef = builder.createFunctionRef(loc, thunk);
       pullback = builder.createPartialApply(
           loc, thunkRef,
           remapSubstitutionMap(thunk->getForwardingSubstitutionMap()),
           {pullback}, pullbackType->getCalleeConvention());
     }
     args.push_back(seed);

     // Call the callee pullback.
     auto *pullbackCall = builder.createApply(
         loc, pullback, SubstitutionMap(), args, /*isNonThrowing*/ false);
     builder.emitDestroyValueOperation(loc, pullback);

     // Extract all results from `pullbackCall`.
     SmallVector<SILValue, 8> dirResults;
     extractAllElements(pullbackCall, builder, dirResults);
     // Get all results in type-defined order.
     SmallVector<SILValue, 8> allResults;
     collectAllActualResultsInTypeOrder(pullbackCall, dirResults, allResults);
     LLVM_DEBUG({
       auto &s = getADDebugStream();
       s << "All results of the nested pullback call:\n";
       llvm::for_each(allResults, [&](SILValue v) { s << v; });
     });

     // Accumulate adjoints for original differentiation parameters.
     auto allResultsIt = allResults.begin();
     for (unsigned i : applyInfo.indices.parameters->getIndices()) {
       auto origArg = ai->getArgument(origNumIndRes + i);
       auto tan = *allResultsIt++;
       if (tan->getType().isAddress()) {
         addToAdjointBuffer(bb, origArg, tan, loc);
       } else {
         if (origArg->getType().isAddress()) {
           auto *tmpBuf = builder.createAllocStack(loc, tan->getType());
           builder.emitStoreValueOperation(loc, tan, tmpBuf,
                                           StoreOwnershipQualifier::Init);
           addToAdjointBuffer(bb, origArg, tmpBuf, loc);
           builder.emitDestroyAddrAndFold(loc, tmpBuf);
           builder.createDeallocStack(loc, tmpBuf);
         }
         else {
           recordTemporary(tan);
           addAdjointValue(bb, origArg, makeConcreteAdjointValue(tan), loc);
         }
       }
     }
     // Destroy and deallocate pullback indirect results.
     for (auto *alloc : reversed(pullbackIndirectResults)) {
       builder.emitDestroyAddrAndFold(loc, alloc);
       builder.createDeallocStack(loc, alloc);
     }
   }

   /// Handle `struct` instruction.
   ///   Original: y = struct (x0, x1, x2, ...)
   ///    Adjoint: adj[x0] += struct_extract adj[y], #x0
   ///             adj[x1] += struct_extract adj[y], #x1
   ///             adj[x2] += struct_extract adj[y], #x2
   ///             ...
   void visitStructInst(StructInst *si) {
     auto *bb = si->getParent();
     auto loc = si->getLoc();
     auto *structDecl = si->getStructDecl();
     auto av = getAdjointValue(bb, si);
     switch (av.getKind()) {
     case AdjointValueKind::Zero:
       for (auto *field : structDecl->getStoredProperties()) {
         auto fv = si->getFieldValue(field);
         addAdjointValue(bb, fv,
             makeZeroAdjointValue(getRemappedTangentType(fv->getType())), loc);
       }
       break;
     case AdjointValueKind::Concrete: {
       auto adjStruct = materializeAdjointDirect(std::move(av), loc);
       // Find the struct `TangentVector` type.
       auto structTy = remapType(si->getType()).getASTType();
       auto tangentVectorTy =
           getTangentSpace(structTy)->getType()->getCanonicalType();
       assert(!getModule().Types.getTypeLowering(
                  tangentVectorTy, ResilienceExpansion::Minimal)
                      .isAddressOnly());
       auto *tangentVectorDecl =
           tangentVectorTy->getStructOrBoundGenericStruct();
       assert(tangentVectorDecl);

       auto *dti = builder.createDestructureStruct(si->getLoc(), adjStruct);
       // Accumulate adjoints for the fields of the `struct` operand.
       unsigned fieldIndex = 0;
       for (auto it = structDecl->getStoredProperties().begin();
            it != structDecl->getStoredProperties().end(); ++it, ++fieldIndex) {
         VarDecl *field = *it;
         if (field->getAttrs().hasAttribute<NoDerivativeAttr>())
           continue;
         // Find the corresponding field in the tangent space.
         VarDecl *tanField = nullptr;
         if (tangentVectorDecl == structDecl)
           tanField = field;
         // Otherwise, look up the field by name.
         else {
           auto tanFieldLookup =
           tangentVectorDecl->lookupDirect(field->getName());
           if (tanFieldLookup.empty()) {
             getContext().emitNondifferentiabilityError(
                 si, getInvoker(),
                 diag::autodiff_stored_property_no_corresponding_tangent,
                 tangentVectorDecl->getNameStr(), field->getNameStr());
             errorOccurred = true;
             return;
           }
           tanField = cast<VarDecl>(tanFieldLookup.front());
         }
         assert(tanField);
         auto tanElt = dti->getResult(fieldIndex);
         addAdjointValue(
             bb, si->getFieldValue(field),
             makeConcreteAdjointValue(tanElt), si->getLoc());
       }
       break;
     }
     case AdjointValueKind::Aggregate: {
       // Note: All user-called initializations go through the calls to the
       // initializer, and synthesized initializers only have one level of struct
       // formation which will not result into any aggregate adjoint valeus.
       llvm_unreachable("Aggregate adjoint values should not occur for `struct` "
                        "instructions");
     }
     }
   }

   /// Handle `struct_extract` instruction.
   ///   Original: y = struct_extract x, #field
   ///    Adjoint: adj[x] += struct (0, ..., #field': adj[y], ..., 0)
   ///                                       ^~~~~~~
   ///                     field in tangent space corresponding to #field
   void visitStructExtractInst(StructExtractInst *sei) {
     assert(!sei->getField()->getAttrs().hasAttribute<NoDerivativeAttr>() &&
            "`struct_extract` with `@noDerivative` field should not be "
            "differentiated; activity analysis should not marked as varied");
     auto *bb = sei->getParent();
     auto structTy = remapType(sei->getOperand()->getType()).getASTType();
     auto tangentVectorTy =
         getTangentSpace(structTy)->getType()->getCanonicalType();
     assert(!getModule().Types.getTypeLowering(
                tangentVectorTy, ResilienceExpansion::Minimal)
                    .isAddressOnly());
     auto tangentVectorSILTy =
         SILType::getPrimitiveObjectType(tangentVectorTy);
     auto *tangentVectorDecl =
         tangentVectorTy->getStructOrBoundGenericStruct();
     assert(tangentVectorDecl);
     // Find the corresponding field in the tangent space.
     VarDecl *tanField = nullptr;
     // If the tangent space is the original struct, then field is the same.
     if (tangentVectorDecl == sei->getStructDecl())
       tanField = sei->getField();
     // Otherwise, look up the field by name.
     else {
       auto tanFieldLookup =
           tangentVectorDecl->lookupDirect(sei->getField()->getName());
       if (tanFieldLookup.empty()) {
         getContext().emitNondifferentiabilityError(
             sei, getInvoker(),
             diag::autodiff_stored_property_no_corresponding_tangent,
             sei->getStructDecl()->getNameStr(),
             sei->getField()->getNameStr());
         errorOccurred = true;
         return;
       }
       tanField = cast<VarDecl>(tanFieldLookup.front());
     }
     // Accumulate adjoint for the `struct_extract` operand.
     auto av = getAdjointValue(bb, sei);
     switch (av.getKind()) {
     case AdjointValueKind::Zero:
       addAdjointValue(bb, sei->getOperand(),
                       makeZeroAdjointValue(tangentVectorSILTy), sei->getLoc());
       break;
     case AdjointValueKind::Concrete:
     case AdjointValueKind::Aggregate: {
       SmallVector<AdjointValue, 8> eltVals;
       for (auto *field : tangentVectorDecl->getStoredProperties()) {
         if (field == tanField) {
           eltVals.push_back(av);
         } else {
           auto substMap = tangentVectorTy->getMemberSubstitutionMap(
               field->getModuleContext(), field);
           auto fieldTy = field->getType().subst(substMap);
           auto fieldSILTy =
               getContext().getTypeConverter().getLoweredType(
                   fieldTy, ResilienceExpansion::Minimal);
           assert(fieldSILTy.isObject());
           eltVals.push_back(makeZeroAdjointValue(fieldSILTy));
         }
       }
       addAdjointValue(bb, sei->getOperand(),
                       makeAggregateAdjointValue(tangentVectorSILTy, eltVals),
                       sei->getLoc());
     }
     }
   }

   /// Handle `tuple` instruction.
   ///   Original: y = tuple (x0, x1, x2, ...)
   ///    Adjoint: adj[x0] += tuple_extract adj[y], 0
   ///             ...
   void visitTupleInst(TupleInst *ti) {
     auto *bb = ti->getParent();
     auto av = getAdjointValue(bb, ti);
     switch (av.getKind()) {
     case AdjointValueKind::Zero:
       for (auto eltVal : ti->getElements()) {
         if (!getTangentSpace(eltVal->getType().getASTType()))
           continue;
         addAdjointValue(bb, eltVal,
             makeZeroAdjointValue(getRemappedTangentType(eltVal->getType())),
             ti->getLoc());
       }
       break;
     case AdjointValueKind::Concrete: {
       auto val = av.getConcreteValue();
       unsigned adjIdx = 0;
       auto elts = builder.createDestructureTuple(ti->getLoc(), val);
       for (auto i : range(ti->getNumOperands())) {
         if (!getTangentSpace(ti->getOperand(i)->getType().getASTType()))
           continue;
         auto adjElt = val;
         if (val->getType().is<TupleType>())
           adjElt = elts->getResult(adjIdx++);
         addAdjointValue(bb, ti->getOperand(i),
                         makeConcreteAdjointValue(adjElt), ti->getLoc());
       }
       break;
     }
     case AdjointValueKind::Aggregate:
       unsigned adjIdx = 0;
       for (auto i : range(ti->getElements().size())) {
         if (!getTangentSpace(ti->getElement(i)->getType().getASTType()))
           continue;
         addAdjointValue(bb, ti->getElement(i), av.getAggregateElement(adjIdx++),
                         ti->getLoc());
       }
       break;
     }
   }

   /// Handle `tuple_extract` instruction.
   ///   Original: y = tuple_extract x, <n>
   ///    Adjoint: adj[x] += tuple (0, 0, ..., adj[y], ..., 0, 0)
   ///                                         ^~~~~~
   ///                            n'-th element, where n' is tuple tangent space
   ///                            index corresponding to n
   void visitTupleExtractInst(TupleExtractInst *tei) {
     auto *bb = tei->getParent();
     auto tupleTanTy = getRemappedTangentType(tei->getOperand()->getType());
     auto av = getAdjointValue(bb, tei);
     switch (av.getKind()) {
     case AdjointValueKind::Zero:
       addAdjointValue(bb, tei->getOperand(), makeZeroAdjointValue(tupleTanTy),
                       tei->getLoc());
       break;
     case AdjointValueKind::Aggregate:
     case AdjointValueKind::Concrete: {
       auto tupleTy = tei->getTupleType();
       auto tupleTanTupleTy = tupleTanTy.getAs<TupleType>();
       if (!tupleTanTupleTy) {
         addAdjointValue(bb, tei->getOperand(), av, tei->getLoc());
         break;
       }
       SmallVector<AdjointValue, 8> elements;
       unsigned adjIdx = 0;
       for (unsigned i : range(tupleTy->getNumElements())) {
         if (!getTangentSpace(
                 tupleTy->getElement(i).getType()->getCanonicalType()))
           continue;
         if (tei->getFieldNo() == i)
           elements.push_back(av);
         else
           elements.push_back(makeZeroAdjointValue(
               getRemappedTangentType(SILType::getPrimitiveObjectType(
                   tupleTanTupleTy->getElementType(adjIdx++)
                       ->getCanonicalType()))));
       }
       if (elements.size() == 1) {
         addAdjointValue(bb, tei->getOperand(), elements.front(), tei->getLoc());
         break;
       }
       addAdjointValue(bb, tei->getOperand(),
           makeAggregateAdjointValue(tupleTanTy, elements), tei->getLoc());
       break;
     }
     }
   }

   /// Handle `destructure_tuple` instruction.
   ///   Original: (y0, ..., yn) = destructure_tuple x
   ///    Adjoint: adj[x].0 += adj[y0]
   ///             ...
   ///             adj[x].n += adj[yn]
   void visitDestructureTupleInst(DestructureTupleInst *dti) {
     auto *bb = dti->getParent();
     auto tupleTanTy = getRemappedTangentType(dti->getOperand()->getType());
     SmallVector<AdjointValue, 8> adjValues;
     for (auto origElt : dti->getResults()) {
       if (!getTangentSpace(origElt->getType().getASTType()))
         continue;
       adjValues.push_back(getAdjointValue(bb, origElt));
     }
     addAdjointValue(bb, dti->getOperand(),
                     makeAggregateAdjointValue(tupleTanTy, adjValues),
                     dti->getLoc());
   }

   /// Handle `load` or `load_borrow` instruction
   ///   Original: y = load/load_borrow x
   ///    Adjoint: adj[x] += adj[y]
   void visitLoadOperation(SingleValueInstruction *inst) {
     assert(isa<LoadInst>(inst) || isa<LoadBorrowInst>(inst));
     auto *bb = inst->getParent();
     auto adjVal =
     materializeAdjointDirect(getAdjointValue(bb, inst), inst->getLoc());
     // Allocate a local buffer and store the adjoint value. This buffer will be
     // used for accumulation into the adjoint buffer.
     auto *localBuf = builder.createAllocStack(inst->getLoc(), adjVal->getType());
     auto copy = builder.emitCopyValueOperation(inst->getLoc(), adjVal);
     builder.emitStoreValueOperation(inst->getLoc(), copy, localBuf,
                                     StoreOwnershipQualifier::Init);
     // Accumulate the adjoint value in the local buffer into the adjoint buffer.
     addToAdjointBuffer(bb, inst->getOperand(0), localBuf, inst->getLoc());
     builder.emitDestroyAddr(inst->getLoc(), localBuf);
     builder.createDeallocStack(inst->getLoc(), localBuf);
   }
   void visitLoadInst(LoadInst *li) { visitLoadOperation(li); }
   void visitLoadBorrowInst(LoadBorrowInst *lbi) { visitLoadOperation(lbi); }

   /// Handle `store` or `store_borrow` instruction.
   ///   Original: store/store_borrow x to y
   ///    Adjoint: adj[x] += load adj[y]; adj[y] = 0
   void visitStoreOperation(SILBasicBlock *bb, SILLocation loc,
                            SILValue origSrc, SILValue origDest) {
     auto &adjBuf = getAdjointBuffer(bb, origDest);
     auto bufType = remapType(adjBuf->getType());
     auto adjVal = builder.emitLoadValueOperation(
         loc, adjBuf, LoadOwnershipQualifier::Take);
     recordTemporary(adjVal);
     addAdjointValue(bb, origSrc, makeConcreteAdjointValue(adjVal), loc);
     emitZeroIndirect(bufType.getASTType(), adjBuf, loc);
   }
   void visitStoreInst(StoreInst *si) {
     visitStoreOperation(
         si->getParent(), si->getLoc(), si->getSrc(), si->getDest());
   }
   void visitStoreBorrowInst(StoreBorrowInst *sbi) {
     visitStoreOperation(
         sbi->getParent(), sbi->getLoc(), sbi->getSrc(), sbi->getDest());
   }

   /// Handle `copy_addr` instruction.
   ///   Original: copy_addr x to y
   ///    Adjoint: adj[x] += adj[y]; adj[y] = 0
   void visitCopyAddrInst(CopyAddrInst *cai) {
     auto *bb = cai->getParent();
     auto &adjDest = getAdjointBuffer(bb, cai->getDest());
     auto destType = remapType(adjDest->getType());
     addToAdjointBuffer(bb, cai->getSrc(), adjDest, cai->getLoc());
     builder.emitDestroyAddrAndFold(cai->getLoc(), adjDest);
     emitZeroIndirect(destType.getASTType(), adjDest, cai->getLoc());
   }

   /// Handle `copy_value` instruction.
   ///   Original: y = copy_value x
   ///    Adjoint: adj[x] += adj[y]
   void visitCopyValueInst(CopyValueInst *cvi) {
     auto *bb = cvi->getParent();
     auto adj = getAdjointValue(bb, cvi);
     addAdjointValue(bb, cvi->getOperand(), adj, cvi->getLoc());
   }

   /// Handle `begin_borrow` instruction.
   ///   Original: y = begin_borrow x
   ///    Adjoint: adj[x] += adj[y]
   void visitBeginBorrowInst(BeginBorrowInst *bbi) {
     auto *bb = bbi->getParent();
     auto adj = getAdjointValue(bb, bbi);
     addAdjointValue(bb, bbi->getOperand(), adj, bbi->getLoc());
   }

   /// Handle `begin_access` instruction.
   ///   Original: y = begin_access x
   ///    Adjoint: nothing
   void visitBeginAccessInst(BeginAccessInst *bai) {
     // Check for non-differentiable writes.
     if (bai->getAccessKind() == SILAccessKind::Modify) {
       if (auto *gai = dyn_cast<GlobalAddrInst>(bai->getSource())) {
         getContext().emitNondifferentiabilityError(bai, getInvoker(),
             diag::autodiff_cannot_differentiate_writes_to_global_variables);
         errorOccurred = true;
         return;
       }
       if (auto *pbi = dyn_cast<ProjectBoxInst>(bai->getSource())) {
         getContext().emitNondifferentiabilityError(bai, getInvoker(),
             diag::autodiff_cannot_differentiate_writes_to_mutable_captures);
         errorOccurred = true;
         return;
       }
     }
   }

   /// Handle `unconditional_checked_cast_addr` instruction.
   ///   Original: y = unconditional_checked_cast_addr x
   ///    Adjoint: adj[x] += unconditional_checked_cast_addr adj[y]
   void visitUnconditionalCheckedCastAddrInst(
       UnconditionalCheckedCastAddrInst *uccai) {
     auto *bb = uccai->getParent();
     auto &adjDest = getAdjointBuffer(bb, uccai->getDest());
     auto &adjSrc = getAdjointBuffer(bb, uccai->getSrc());
     auto destType = remapType(adjDest->getType());
     auto castBuf = builder.createAllocStack(uccai->getLoc(), adjSrc->getType());
     builder.createUnconditionalCheckedCastAddr(
         uccai->getLoc(), adjDest, adjDest->getType().getASTType(), castBuf,
         adjSrc->getType().getASTType());
     addToAdjointBuffer(bb, uccai->getSrc(), castBuf, uccai->getLoc());
     builder.emitDestroyAddrAndFold(uccai->getLoc(), castBuf);
     builder.createDeallocStack(uccai->getLoc(), castBuf);
     emitZeroIndirect(destType.getASTType(), adjDest, uccai->getLoc());
   }

 #define NOT_DIFFERENTIABLE(INST, DIAG) \
   void visit##INST##Inst(INST##Inst *inst) { \
     getContext().emitNondifferentiabilityError( \
         inst, getInvoker(), diag::DIAG); \
     errorOccurred = true; \
     return; \
   }
   NOT_DIFFERENTIABLE(RefElementAddr, autodiff_class_property_not_supported)
 #undef NOT_DIFFERENTIABLE

 #define NO_ADJOINT(INST) \
   void visit##INST##Inst(INST##Inst *inst) {}
   // Terminators.
   NO_ADJOINT(Return)
   NO_ADJOINT(Branch)
   NO_ADJOINT(CondBranch)

   // Buffer projection.
   NO_ADJOINT(StructElementAddr)
   NO_ADJOINT(TupleElementAddr)

   // Memory allocation/access.
   NO_ADJOINT(AllocStack)
   NO_ADJOINT(DeallocStack)
   NO_ADJOINT(EndAccess)

   // Debugging/reference counting instructions.
   NO_ADJOINT(DebugValue)
   NO_ADJOINT(DebugValueAddr)
   NO_ADJOINT(RetainValue)
   NO_ADJOINT(RetainValueAddr)
   NO_ADJOINT(ReleaseValue)
   NO_ADJOINT(ReleaseValueAddr)
   NO_ADJOINT(StrongRetain)
   NO_ADJOINT(StrongRelease)
   NO_ADJOINT(UnownedRetain)
   NO_ADJOINT(UnownedRelease)
   NO_ADJOINT(StrongRetainUnowned)
   NO_ADJOINT(DestroyValue)
   NO_ADJOINT(DestroyAddr)

   // Value ownership.
   NO_ADJOINT(EndBorrow)
 #undef NO_DERIVATIVE
 };
 } // end anonymous namespace

 AdjointValue PullbackEmitter::makeZeroAdjointValue(SILType type) {
   return AdjointValue::createZero(allocator, remapType(type));
 }

 AdjointValue
 PullbackEmitter::makeConcreteAdjointValue(SILValue value) {
   return AdjointValue::createConcrete(allocator, value);
 }

 template<typename EltRange>
 AdjointValue PullbackEmitter::makeAggregateAdjointValue(
     SILType type, EltRange elements) {
   return AdjointValue::createAggregate(allocator, remapType(type), elements);
 }

 SILValue PullbackEmitter::materializeAdjointDirect(
     AdjointValue val, SILLocation loc) {
   assert(val.getType().isObject());
   LLVM_DEBUG(getADDebugStream() <<
              "Materializing adjoints for " << val << '\n');
   switch (val.getKind()) {
   case AdjointValueKind::Zero:
     return recordTemporary(emitZeroDirect(val.getType().getASTType(), loc));
   case AdjointValueKind::Aggregate: {
     SmallVector<SILValue, 8> elements;
     for (auto i : range(val.getNumAggregateElements())) {
       auto eltVal = materializeAdjointDirect(val.getAggregateElement(i), loc);
       elements.push_back(builder.emitCopyValueOperation(loc, eltVal));
     }
     if (val.getType().is<TupleType>())
       return recordTemporary(
           builder.createTuple(loc, val.getType(), elements));
     else
       return recordTemporary(
           builder.createStruct(loc, val.getType(), elements));
   }
   case AdjointValueKind::Concrete:
     return val.getConcreteValue();
   }
 }

 SILValue PullbackEmitter::materializeAdjoint(AdjointValue val,
                                              SILLocation loc) {
   if (val.isConcrete()) {
     LLVM_DEBUG(getADDebugStream()
         << "Materializing adjoint: Value is concrete.\n");
     return val.getConcreteValue();
   }
   LLVM_DEBUG(getADDebugStream() << "Materializing adjoint: Value is "
                                    "non-concrete. Materializing directly.\n");
   return materializeAdjointDirect(val, loc);
 }

 void PullbackEmitter::materializeAdjointIndirect(
     AdjointValue val, SILValue destBufferAccess, SILLocation loc) {
   switch (val.getKind()) {
   /// Given a `%buf : *T, emit instructions that produce a zero or an aggregate
   /// of zeros of the expected type. When `T` conforms to
   /// `AdditiveArithmetic`, we emit a call to `AdditiveArithmetic.zero`. When
   /// `T` is a builtin float, we emit a `float_literal` instruction.
   /// Otherwise, we assert that `T` must be an aggregate where each element
   /// conforms to `AdditiveArithmetic` or is a builtin float. We expect to emit
   /// a zero for each element and use the appropriate aggregate constructor
   /// instruction (in this case, `tuple`) to produce a tuple. But currently,
   /// since we need indirect passing for aggregate instruction, we just use
   /// `tuple_element_addr` to get element buffers and write elements to them.
   case AdjointValueKind::Zero:
     emitZeroIndirect(val.getSwiftType(), destBufferAccess, loc);
     break;
   /// Given a `%buf : *(T0, T1, T2, ...)` or `%buf : *Struct` recursively emit
   /// instructions to materialize the symbolic tuple or struct, filling the
   /// buffer.
   case AdjointValueKind::Aggregate: {
     if (auto *tupTy = val.getSwiftType()->getAs<TupleType>()) {
       for (auto idx : range(val.getNumAggregateElements())) {
         auto eltTy = SILType::getPrimitiveAddressType(
             tupTy->getElementType(idx)->getCanonicalType());
         auto *eltBuf =
             builder.createTupleElementAddr(loc, destBufferAccess, idx, eltTy);
         materializeAdjointIndirect(
             val.getAggregateElement(idx), eltBuf, loc);
       }
     } else if (auto *structDecl =
                    val.getSwiftType()->getStructOrBoundGenericStruct()) {
       auto fieldIt = structDecl->getStoredProperties().begin();
       for (unsigned i = 0; fieldIt != structDecl->getStoredProperties().end();
            ++fieldIt, ++i) {
         auto eltBuf =
             builder.createStructElementAddr(loc, destBufferAccess, *fieldIt);
         materializeAdjointIndirect(
             val.getAggregateElement(i), eltBuf, loc);
       }
     } else {
       llvm_unreachable("Not an aggregate type");
     }
     break;
   }
   /// Value is already materialized!
   case AdjointValueKind::Concrete:
     auto concreteVal = val.getConcreteValue();
     builder.emitStoreValueOperation(loc, concreteVal, destBufferAccess,
                                     StoreOwnershipQualifier::Init);
     break;
   }
 }

 void PullbackEmitter::emitZeroIndirect(CanType type, SILValue bufferAccess,
                                        SILLocation loc) {
   auto tangentSpace = getTangentSpace(type);
   assert(tangentSpace && "No tangent space for this type");
   switch (tangentSpace->getKind()) {
   case VectorSpace::Kind::Vector:
     emitZeroIntoBuffer(builder, type, bufferAccess, loc);
     return;
   case VectorSpace::Kind::Tuple: {
     auto tupleType = tangentSpace->getTuple();
     SmallVector<SILValue, 8> zeroElements;
     for (unsigned i : range(tupleType->getNumElements())) {
       auto eltAddr = builder.createTupleElementAddr(loc, bufferAccess, i);
       emitZeroIndirect(tupleType->getElementType(i)->getCanonicalType(),
                        eltAddr, loc);
     }
     return;
   }
   case VectorSpace::Kind::Function: {
     llvm_unreachable(
       "Unimplemented: Emit thunks for abstracting zero initialization");
   }
   }
 }

 SILValue PullbackEmitter::emitZeroDirect(CanType type, SILLocation loc) {
   auto silType = getModule().Types.getLoweredLoadableType(
       type, ResilienceExpansion::Minimal, getModule());
   auto *buffer = builder.createAllocStack(loc, silType);
   emitZeroIndirect(type, buffer, loc);
   auto loaded = builder.emitLoadValueOperation(
       loc, buffer, LoadOwnershipQualifier::Take);
   builder.createDeallocStack(loc, buffer);
   return loaded;
 }

 AdjointValue
 PullbackEmitter::accumulateAdjointsDirect(AdjointValue lhs, AdjointValue rhs,
                                           SILLocation loc) {
   LLVM_DEBUG(getADDebugStream()
              << "Materializing adjoint directly.\nLHS: " << lhs
              << "\nRHS: " << rhs << '\n');

   switch (lhs.getKind()) {
   // x
   case AdjointValueKind::Concrete: {
     auto lhsVal = lhs.getConcreteValue();
     switch (rhs.getKind()) {
     // x + y
     case AdjointValueKind::Concrete: {
       auto rhsVal = rhs.getConcreteValue();
       auto sum = recordTemporary(accumulateDirect(lhsVal, rhsVal, loc));
       return makeConcreteAdjointValue(sum);
     }
     // x + 0 => x
     case AdjointValueKind::Zero:
       return lhs;
     // x + (y, z) => (x.0 + y, x.1 + z)
     case AdjointValueKind::Aggregate:
       SmallVector<AdjointValue, 8> newElements;
       auto lhsTy = lhsVal->getType().getASTType();
       auto lhsValCopy = builder.emitCopyValueOperation(loc, lhsVal);
       if (auto *tupTy = lhsTy->getAs<TupleType>()) {
         auto elts = builder.createDestructureTuple(loc, lhsValCopy);
         llvm::for_each(elts->getResults(),
                        [this](SILValue result) { recordTemporary(result); });
         for (auto i : indices(elts->getResults())) {
           auto rhsElt = rhs.getAggregateElement(i);
           newElements.push_back(accumulateAdjointsDirect(
               makeConcreteAdjointValue(elts->getResult(i)), rhsElt, loc));
         }
       } else if (auto *structDecl = lhsTy->getStructOrBoundGenericStruct()) {
         auto elts =
             builder.createDestructureStruct(lhsVal.getLoc(), lhsValCopy);
         llvm::for_each(elts->getResults(),
                        [this](SILValue result) { recordTemporary(result); });
         for (unsigned i : indices(elts->getResults())) {
           auto rhsElt = rhs.getAggregateElement(i);
           newElements.push_back(
               accumulateAdjointsDirect(
                   makeConcreteAdjointValue(elts->getResult(i)), rhsElt, loc));
         }
       } else {
         llvm_unreachable("Not an aggregate type");
       }
       return makeAggregateAdjointValue(lhsVal->getType(), newElements);
     }
   }
   // 0
   case AdjointValueKind::Zero:
     // 0 + x => x
     return rhs;
   // (x, y)
   case AdjointValueKind::Aggregate:
     switch (rhs.getKind()) {
     // (x, y) + z => (x + z.0, y + z.1)
     case AdjointValueKind::Concrete:
     // x + 0 => x
     case AdjointValueKind::Zero:
       return lhs;
     // (x, y) + (z, w) => (x + z, y + w)
     case AdjointValueKind::Aggregate: {
       SmallVector<AdjointValue, 8> newElements;
       for (auto i : range(lhs.getNumAggregateElements()))
         newElements.push_back(
             accumulateAdjointsDirect(lhs.getAggregateElement(i),
                                      rhs.getAggregateElement(i),
                                      loc));
       return makeAggregateAdjointValue(lhs.getType(), newElements);
     }
     }
   }
 }

 SILValue PullbackEmitter::accumulateDirect(SILValue lhs, SILValue rhs,
                                            SILLocation loc) {
   // TODO: Optimize for the case when lhs == rhs.
   LLVM_DEBUG(getADDebugStream() <<
              "Emitting adjoint accumulation for lhs: " << lhs <<
              " and rhs: " << rhs << "\n");
   assert(lhs->getType() == rhs->getType() && "Adjoints must have equal types!");
   assert(lhs->getType().isObject() && rhs->getType().isObject() &&
          "Adjoint types must be both object types!");
   auto adjointTy = lhs->getType();
   auto adjointASTTy = adjointTy.getASTType();
   auto tangentSpace = getTangentSpace(adjointASTTy);
   auto lhsCopy = builder.emitCopyValueOperation(loc, lhs);
   auto rhsCopy = builder.emitCopyValueOperation(loc, rhs);
   assert(tangentSpace && "No tangent space for this type");
   switch (tangentSpace->getKind()) {
   case VectorSpace::Kind::Vector: {
     // Allocate buffers for inputs and output.
     auto *resultBuf = builder.createAllocStack(loc, adjointTy);
     auto *lhsBuf = builder.createAllocStack(loc, adjointTy);
     auto *rhsBuf = builder.createAllocStack(loc, adjointTy);
     // Initialize input buffers.
     builder.emitStoreValueOperation(loc, lhsCopy, lhsBuf,
                                     StoreOwnershipQualifier::Init);
     builder.emitStoreValueOperation(loc, rhsCopy, rhsBuf,
                                     StoreOwnershipQualifier::Init);
     accumulateIndirect(resultBuf, lhsBuf, rhsBuf, loc);
     builder.emitDestroyAddr(loc, lhsBuf);
     builder.emitDestroyAddr(loc, rhsBuf);
     // Deallocate input buffers.
     builder.createDeallocStack(loc, rhsBuf);
     builder.createDeallocStack(loc, lhsBuf);
     auto val = builder.emitLoadValueOperation(
         loc, resultBuf, LoadOwnershipQualifier::Take);
     // Deallocate result buffer.
     builder.createDeallocStack(loc, resultBuf);
     return val;
   }
   case VectorSpace::Kind::Tuple: {
     SmallVector<SILValue, 8> adjElements;
     auto lhsElts = builder.createDestructureTuple(loc, lhsCopy)->getResults();
     auto rhsElts = builder.createDestructureTuple(loc, rhsCopy)->getResults();
     for (auto zipped : llvm::zip(lhsElts, rhsElts))
       adjElements.push_back(
           accumulateDirect(std::get<0>(zipped), std::get<1>(zipped), loc));
     return builder.createTuple(loc, adjointTy, adjElements);
   }
   case VectorSpace::Kind::Function: {
     llvm_unreachable(
         "Unimplemented: Emit thunks for abstracting adjoint accumulation");
   }
   }
 }

 void PullbackEmitter::accumulateIndirect(
     SILValue resultBufAccess, SILValue lhsBufAccess, SILValue rhsBufAccess,
     SILLocation loc) {
   // TODO: Optimize for the case when lhs == rhs.
   assert(lhsBufAccess->getType() == rhsBufAccess->getType() &&
          "Adjoint values must have same type!");
   assert(lhsBufAccess->getType().isAddress() &&
          rhsBufAccess->getType().isAddress() &&
          "Adjoint values must both have address types!");
   auto adjointTy = lhsBufAccess->getType();
   auto adjointASTTy = adjointTy.getASTType();
   auto *swiftMod = getModule().getSwiftModule();
   auto tangentSpace = adjointASTTy->getAutoDiffAssociatedTangentSpace(
       LookUpConformanceInModule(swiftMod));
   assert(tangentSpace && "No tangent space for this type");
   switch (tangentSpace->getKind()) {
   case VectorSpace::Kind::Vector: {
     auto *proto = getContext().getAdditiveArithmeticProtocol();
     auto *combinerFuncDecl = getContext().getPlusDecl();
     // Call the combiner function and return.
     auto adjointParentModule = tangentSpace->getNominal()
         ? tangentSpace->getNominal()->getModuleContext()
         : getModule().getSwiftModule();
     auto confRef = adjointParentModule->lookupConformance(adjointASTTy,
                                                            proto);
     assert(confRef.hasValue() && "Missing conformance to `AdditiveArithmetic`");
     SILDeclRef declRef(combinerFuncDecl, SILDeclRef::Kind::Func);
     auto silFnTy = getContext().getTypeConverter().getConstantType(declRef);
     // %0 = witness_method @+
     auto witnessMethod = builder.createWitnessMethod(loc, adjointASTTy,
                                                      *confRef, declRef,
                                                      silFnTy);
     auto subMap = SubstitutionMap::getProtocolSubstitutions(
         proto, adjointASTTy, *confRef);
     // %1 = metatype $T.Type
     auto metatypeType =
         CanMetatypeType::get(adjointASTTy, MetatypeRepresentation::Thick);
     auto metatypeSILType = SILType::getPrimitiveObjectType(metatypeType);
     auto metatype = builder.createMetatype(loc, metatypeSILType);
     // %2 = apply $0(%result, %new, %old, %1)
     builder.createApply(loc, witnessMethod, subMap,
                         {resultBufAccess, rhsBufAccess, lhsBufAccess, metatype},
                         /*isNonThrowing*/ false);
     builder.emitDestroyValueOperation(loc, witnessMethod);
     return;
   }
   case VectorSpace::Kind::Tuple: {
     auto tupleType = tangentSpace->getTuple();
     for (unsigned i : range(tupleType->getNumElements())) {
       auto *destAddr = builder.createTupleElementAddr(loc, resultBufAccess, i);
       auto *eltAddrLHS = builder.createTupleElementAddr(loc, lhsBufAccess, i);
       auto *eltAddrRHS = builder.createTupleElementAddr(loc, rhsBufAccess, i);
       accumulateIndirect(destAddr, eltAddrLHS, eltAddrRHS, loc);
     }
     return;
   }
   case VectorSpace::Kind::Function: {
     llvm_unreachable(
         "Unimplemented: Emit thunks for abstracting adjoint value "
         "accumulation");
   }
   }
 }

 void PullbackEmitter::accumulateIndirect(SILValue lhsDestAccess,
                                          SILValue rhsAccess, SILLocation loc) {
   assert(lhsDestAccess->getType().isAddress() &&
          rhsAccess->getType().isAddress());
   assert(lhsDestAccess->getFunction() == &getPullback());
   assert(rhsAccess->getFunction() == &getPullback());
   auto type = lhsDestAccess->getType();
   auto astType = type.getASTType();
   auto *swiftMod = getModule().getSwiftModule();
   auto tangentSpace = astType->getAutoDiffAssociatedTangentSpace(
       LookUpConformanceInModule(swiftMod));
   assert(tangentSpace && "No tangent space for this type");
   switch (tangentSpace->getKind()) {
   case VectorSpace::Kind::Vector: {
     auto *proto = getContext().getAdditiveArithmeticProtocol();
     auto *accumulatorFuncDecl = getContext().getPlusEqualDecl();
     // Call the combiner function and return.
     auto confRef = swiftMod->lookupConformance(astType, proto);
     assert(confRef.hasValue() && "Missing conformance to `AdditiveArithmetic`");
     SILDeclRef declRef(accumulatorFuncDecl, SILDeclRef::Kind::Func);
     auto silFnTy = getContext().getTypeConverter().getConstantType(declRef);
     // %0 = witness_method @+=
     auto witnessMethod =
         builder.createWitnessMethod(loc, astType, *confRef, declRef, silFnTy);
     auto subMap =
         SubstitutionMap::getProtocolSubstitutions(proto, astType, *confRef);
     // %1 = metatype $T.Type
     auto metatypeType =
         CanMetatypeType::get(astType, MetatypeRepresentation::Thick);
     auto metatypeSILType = SILType::getPrimitiveObjectType(metatypeType);
     auto metatype = builder.createMetatype(loc, metatypeSILType);
     // %2 = apply $0(%lhs, %rhs, %1)
     builder.createApply(loc, witnessMethod, subMap,
                         {lhsDestAccess, rhsAccess, metatype},
                         /*isNonThrowing*/ false);
     builder.emitDestroyValueOperation(loc, witnessMethod);
     return;
   }
   case VectorSpace::Kind::Tuple: {
     auto tupleType = tangentSpace->getTuple();
     for (unsigned i : range(tupleType->getNumElements())) {
       auto *destAddr = builder.createTupleElementAddr(loc, lhsDestAccess, i);
       auto *eltAddrRHS = builder.createTupleElementAddr(loc, rhsAccess, i);
       accumulateIndirect(destAddr, eltAddrRHS, loc);
     }
     return;
   }
   case VectorSpace::Kind::Function: {
     llvm_unreachable(
         "Unimplemented: Emit thunks for abstracting adjoint value "
         "accumulation");
   }
   }
 }

 bool VJPEmitter::run() {
   LLVM_DEBUG(getADDebugStream()
              << "Cloning original @" << original->getName()
              << " to vjp @" << vjp->getName() << '\n');
   // Create entry BB and arguments.
   auto *entry = vjp->createBasicBlock();
   createEntryArguments(vjp);

   // Clone.
   SmallVector<SILValue, 4> entryArgs(entry->getArguments().begin(),
                                      entry->getArguments().end());
   cloneFunctionBody(original, entry, entryArgs);
   // If errors occurred, back out.
   if (errorOccurred)
     return true;

   // Each `@guaranteed` trampoline argument needs to have a lifetime-ending use
   // past its destination argument's lifetime-ending uses (aka. `end_borrow`).
   // `trampolinedGuaranteedPhiArguments` tracks all `@guaranteed` trampoline
   // arguments. We emit an `end_borrow` immediately past each destination
   // argument's lifetime-ending uses.
   for (auto &trampolinedArgPair : trampolinedGuaranteedPhiArguments) {
     for (auto *destArgUse : trampolinedArgPair.destinationArgument->getUses()) {
       if (auto *lifetimeEnd = dyn_cast<EndBorrowInst>(destArgUse->getUser())) {
         getBuilder().setInsertionPoint(lifetimeEnd->getParentBlock(),
                                        std::next(lifetimeEnd->getIterator()));
         getBuilder().emitEndBorrowOperation(
             lifetimeEnd->getLoc(), trampolinedArgPair.trampolineArgument);
       }
     }
   }

   // Generate pullback code.
   PullbackEmitter PullbackEmitter(*this);
   if (PullbackEmitter.run()) {
     errorOccurred = true;
     return true;
   }
   LLVM_DEBUG(getADDebugStream() << "Generated VJP for "
                                 << original->getName() << ":\n" << *vjp);
   return errorOccurred;
 }

 //===----------------------------------------------------------------------===//
 // `[differentiable]` attribute processing
 //===----------------------------------------------------------------------===//

 SILFunction *
 ADContext::declareExternalDerivativeFunction(
     SILFunction *original, SILDifferentiableAttr *attr, StringRef name,
     AutoDiffDerivativeFunctionKind kind) {
   auto &module = getModule();
   auto &indices = attr->getIndices();
   auto originalTy = original->getLoweredFunctionType();
   auto originalLoc = original->getLocation();
   auto assocGenSig = getDerivativeGenericSignature(attr, original);
   auto derivativeFnTy = originalTy->getAutoDiffDerivativeFunctionType(
       indices.parameters, indices.source, kind, module.Types,
       LookUpConformanceInModule(module.getSwiftModule()), assocGenSig);
   SILOptFunctionBuilder fb(getTransform());
   // Create external function declaration.
   auto *derivativeFn = fb.createFunction(
       SILLinkage::PublicExternal, name, derivativeFnTy,
       /*genericEnv*/ nullptr, originalLoc, original->isBare(), IsNotTransparent,
       original->isSerialized(), original->isDynamicallyReplaceable());
   // Note: Setting debug scope prevents crashes during later transforms.
   derivativeFn->setDebugScope(new (module) SILDebugScope(originalLoc, derivativeFn));
   return derivativeFn;
 }

 static SILFunction *createEmptyVJP(
     ADContext &context, SILFunction *original, SILDifferentiableAttr *attr,
     bool isExported) {
   LLVM_DEBUG({
     auto &s = getADDebugStream();
     s << "Creating VJP:\n\t";
     s << "Original type: " << original->getLoweredFunctionType() << "\n\t";
   });

   auto &module = context.getModule();
   auto originalTy = original->getLoweredFunctionType();
   auto indices = attr->getIndices();

   // === Create an empty VJP. ===
   Mangle::ASTMangler mangler;
   auto vjpName = original->getASTContext().getIdentifier(
       mangler.mangleAutoDiffDerivativeFunctionHelper(
           original->getName(), AutoDiffDerivativeFunctionKind::VJP, indices))
               .str();
   auto vjpGenericSig = getDerivativeGenericSignature(attr, original);

   // RAII that pushes the original function's generic signature to
   // `module.Types` so that calls to `module.Types.getTypeLowering()` below
   // will know the VJP's generic parameter types.
   Lowering::GenericContextScope genericContextScope(
       module.Types, vjpGenericSig);

   auto *vjpGenericEnv = vjpGenericSig
       ? vjpGenericSig->getGenericEnvironment()
       : nullptr;
   auto vjpType = originalTy->getAutoDiffDerivativeFunctionType(
       indices.parameters, indices.source, AutoDiffDerivativeFunctionKind::VJP,
       module.Types, LookUpConformanceInModule(module.getSwiftModule()),
       vjpGenericSig);

   SILOptFunctionBuilder fb(context.getTransform());
   auto linkage = autodiff::getAutoDiffDerivativeFunctionLinkage(
       original->getLinkage(), isExported);
   auto *vjp = fb.createFunction(linkage, vjpName, vjpType, vjpGenericEnv,
                                 original->getLocation(), original->isBare(),
                                 IsNotTransparent, original->isSerialized(),
                                 original->isDynamicallyReplaceable());
   vjp->setDebugScope(new (module) SILDebugScope(original->getLocation(), vjp));
   attr->setVJPName(vjpName);

   LLVM_DEBUG(llvm::dbgs() << "VJP type: " << vjp->getLoweredFunctionType()
                           << "\n");
   return vjp;
 }

 static SILFunction *createEmptyJVP(
     ADContext &context, SILFunction *original, SILDifferentiableAttr *attr,
     bool isExported) {
   LLVM_DEBUG({
     auto &s = getADDebugStream();
     s << "Creating JVP:\n\t";
     s << "Original type: " << original->getLoweredFunctionType() << "\n\t";
   });

   auto &module = context.getModule();
   auto originalTy = original->getLoweredFunctionType();
   auto indices = attr->getIndices();

   // === Create an empty JVP. ===
   Mangle::ASTMangler mangler;
   auto jvpName = original->getASTContext().getIdentifier(
       mangler.mangleAutoDiffDerivativeFunctionHelper(
           original->getName(), AutoDiffDerivativeFunctionKind::JVP, indices))
               .str();
   auto jvpGenericSig = getDerivativeGenericSignature(attr, original);

   // RAII that pushes the original function's generic signature to
   // `module.Types` so that calls to `module.Types.getTypeLowering()` below
   // will know the VJP's generic parameter types.
   Lowering::GenericContextScope genericContextScope(
       module.Types, jvpGenericSig);

   auto *jvpGenericEnv = jvpGenericSig
       ? jvpGenericSig->getGenericEnvironment()
       : nullptr;
   auto jvpType = originalTy->getAutoDiffDerivativeFunctionType(
       indices.parameters, indices.source,
       AutoDiffDerivativeFunctionKind::JVP, module.Types,
       LookUpConformanceInModule(module.getSwiftModule()), jvpGenericSig);

   SILOptFunctionBuilder fb(context.getTransform());
   auto linkage = autodiff::getAutoDiffDerivativeFunctionLinkage(
       original->getLinkage(), isExported);
   auto *jvp = fb.createFunction(linkage, jvpName, jvpType, jvpGenericEnv,
                                 original->getLocation(), original->isBare(),
                                 IsNotTransparent, original->isSerialized(),
                                 original->isDynamicallyReplaceable());
   jvp->setDebugScope(new (module) SILDebugScope(original->getLocation(), jvp));
   attr->setJVPName(jvpName);

   LLVM_DEBUG(llvm::dbgs() << "JVP type: " << jvp->getLoweredFunctionType()
              << "\n");
   return jvp;
 }

 /// Returns true on error.
 bool ADContext::processDifferentiableAttribute(
     SILFunction *original, SILDifferentiableAttr *attr,
     DifferentiationInvoker invoker) {
   auto &module = getModule();
   // Try to look up JVP only if attribute specifies JVP name or if original
   // function is an external declaration. If JVP function cannot be found,
   // create an external JVP reference.
   StringRef jvpName;
   SILFunction *jvp = nullptr;
   if (attr->hasJVP()) {
     jvpName = attr->getJVPName();
   } else if (original->isExternalDeclaration()) {
     Mangle::ASTMangler mangler;
     jvpName = original->getASTContext().getIdentifier(
         mangler.mangleAutoDiffDerivativeFunctionHelper(
             original->getName(), AutoDiffDerivativeFunctionKind::JVP,
             attr->getIndices())).str();
   }
   if (!jvpName.empty()) {
     jvp = module.lookUpFunction(jvpName);
     if (!jvp)
       jvp = declareExternalDerivativeFunction(
           original, attr, jvpName, AutoDiffDerivativeFunctionKind::JVP);
     attr->setJVPName(jvpName);
   }

   // If differentiation is triggered by `[differentiable]`, derivative function
   // should share linkage of original function.
   auto isDerivativeFnExported =
       invoker.getKind() ==
           DifferentiationInvoker::Kind::SILDifferentiableAttribute;

   // Try to look up VJP only if attribute specifies VJP name or if original
   // function is an external declaration. If VJP function cannot be found,
   // create an external VJP reference.
   StringRef vjpName;
   SILFunction *vjp = nullptr;
   if (attr->hasVJP()) {
     vjpName = attr->getVJPName();
   } else if (original->isExternalDeclaration()) {
     Mangle::ASTMangler mangler;
     vjpName = original->getASTContext().getIdentifier(
         mangler.mangleAutoDiffDerivativeFunctionHelper(
             original->getName(), AutoDiffDerivativeFunctionKind::VJP,
             attr->getIndices())).str();
   }
   if (!vjpName.empty()) {
     vjp = module.lookUpFunction(vjpName);
     if (!vjp)
       vjp = declareExternalDerivativeFunction(
           original, attr, vjpName, AutoDiffDerivativeFunctionKind::VJP);
     attr->setVJPName(vjpName);
   }

   // If the JVP doesn't exist, need to synthesize it.
   if (!jvp) {
     // Diagnose:
     // - Functions with no return.
     // - Functions with unsupported control flow.
     if (getASTContext().LangOpts.EnableExperimentalForwardModeDifferentiation &&
         (diagnoseNoReturn(*this, original, invoker) ||
          diagnoseUnsupportedControlFlow(*this, original, invoker)))
       return true;

     jvp = createEmptyJVP(*this, original, attr, isDerivativeFnExported);
     getGeneratedFunctions().push_back(jvp);

     // For now, only do JVP generation if the flag is enabled and if custom VJP
     // does not exist. If custom VJP exists but custom JVP does not, skip JVP
     // generation because generated JVP may not match semantics of custom VJP.
     // Instead, create an empty JVP.
     if (getASTContext().LangOpts.EnableExperimentalForwardModeDifferentiation &&
         !vjp) {
       // JVP and differential generation do not currently support functions with
       // multiple basic blocks.
       if (original->getBlocks().size() > 1) {
         emitNondifferentiabilityError(
             original->getLocation().getSourceLoc(), invoker,
             diag::autodiff_jvp_control_flow_not_supported);
         return true;
       }

       JVPEmitter emitter(*this, original, attr, jvp, invoker);
       if (emitter.run())
         return true;
     } else {
       LLVM_DEBUG(getADDebugStream()
                  << "Generating empty JVP for original @"
                  << original->getName() << '\n');
       // Create empty JVP body since custom VJP exists.
       auto *entry = jvp->createBasicBlock();
       createEntryArguments(jvp);
       SILBuilder builder(entry);
       auto loc = jvp->getLocation();

       // Destroy all owned arguments.
       for (auto *arg : entry->getArguments())
         if (arg->getOwnershipKind() == ValueOwnershipKind::Owned)
           builder.emitDestroyOperation(loc, arg);

       // Fatal error in case this JVP is called by the user.
       auto neverResultInfo = SILResultInfo(
           module.getASTContext().getNeverType(), ResultConvention::Unowned);
       auto fatalErrorJVPType = SILFunctionType::get(
           /*genericSig*/ nullptr,
           SILFunctionType::ExtInfo().withRepresentation(
               SILFunctionTypeRepresentation::Thin),
           SILCoroutineKind::None, ParameterConvention::Direct_Unowned, {},
           /*interfaceYields*/ {}, neverResultInfo,
           /*interfaceErrorResults*/ None, getASTContext());
       auto fnBuilder = SILOptFunctionBuilder(getTransform());
       auto *fatalErrrorJvpFunc = fnBuilder.getOrCreateFunction(
           loc, "_printJVPErrorAndExit", SILLinkage::PublicExternal,
           fatalErrorJVPType, IsNotBare, IsNotTransparent, IsNotSerialized,
           IsNotDynamic, ProfileCounter(), IsNotThunk);
       auto *jvpErrorFuncRef =
           builder.createFunctionRef(loc, fatalErrrorJvpFunc);
       builder.createApply(loc, jvpErrorFuncRef, SubstitutionMap(), {});
       builder.createUnreachable(loc);
       LLVM_DEBUG(getADDebugStream() << "Generated empty JVP for "
                  << original->getName() << ":\n" << *jvp);
     }
   }

   // If the VJP doesn't exist, need to synthesize it.
   if (!vjp) {
     // Diagnose:
     // - Functions with no return.
     // - Functions with unsupported control flow.
     if (diagnoseNoReturn(*this, original, invoker) ||
         diagnoseUnsupportedControlFlow(*this, original, invoker))
       return true;

     vjp = createEmptyVJP(*this, original, attr, isDerivativeFnExported);
     getGeneratedFunctions().push_back(vjp);
     VJPEmitter emitter(*this, original, attr, vjp, invoker);
     return emitter.run();
   }

   return false;
 }

 //===----------------------------------------------------------------------===//
 // Differentiation pass implementation
 //===----------------------------------------------------------------------===//

 /// The automatic differentiation pass.
 namespace {
 class Differentiation : public SILModuleTransform {
 public:
   Differentiation() : SILModuleTransform() {}
   void run() override;
 };
 } // end anonymous namespace

 std::pair<SILFunction *, SubstitutionMap>
 ADContext::getOrCreateSubsetParametersThunkForLinearMap(
     SILFunction *parentThunk, CanSILFunctionType linearMapType,
     CanSILFunctionType targetType, AutoDiffDerivativeFunctionKind kind,
     SILAutoDiffIndices desiredIndices, SILAutoDiffIndices actualIndices) {
   LLVM_DEBUG(getADDebugStream()
              << "Getting a subset parameters thunk for " << linearMapType
              << " from " << actualIndices << " to " << desiredIndices << '\n');

   SubstitutionMap interfaceSubs;
   GenericEnvironment *genericEnv = nullptr;
   auto thunkType = buildThunkType(
       parentThunk, linearMapType, targetType, genericEnv, interfaceSubs,
       /*withoutActuallyEscaping*/ true,
       DifferentiationThunkKind::Reabstraction);

   // TODO(TF-685): Use more principled mangling for thunks.
   std::string thunkName;
   switch (kind) {
     case AutoDiffDerivativeFunctionKind::JVP:
       thunkName = "differential";
       break;
     case AutoDiffDerivativeFunctionKind::VJP:
       thunkName = "pullback";
   }
   Mangle::ASTMangler mangler;
   auto fromInterfaceType =
       linearMapType->mapTypeOutOfContext()->getCanonicalType();
   auto toInterfaceType = targetType->mapTypeOutOfContext()->getCanonicalType();
   CanType dynamicSelfType;
   thunkName = "AD__" + mangler.mangleReabstractionThunkHelper(
       thunkType, fromInterfaceType, toInterfaceType, dynamicSelfType,
       module.getSwiftModule()) + "_" + desiredIndices.mangle() + "_" +
       thunkName;
   thunkName += "_index_subset_thunk";

   auto loc = parentThunk->getLocation();
   SILOptFunctionBuilder fb(getTransform());
   auto *thunk = fb.getOrCreateSharedFunction(
       loc, thunkName, thunkType, IsBare, IsTransparent, IsSerialized,
       ProfileCounter(), IsThunk, IsNotDynamic);

   if (!thunk->empty())
     return {thunk, interfaceSubs};

   thunk->setGenericEnvironment(genericEnv);
   thunk->setOwnershipEliminated();
   auto *entry = thunk->createBasicBlock();
   SILBuilder builder(entry);
   createEntryArguments(thunk);

   // Get arguments.
   SmallVector<SILValue, 4> arguments;
   SmallVector<AllocStackInst *, 4> localAllocations;

   // Build a `.zero` argument for the given `Differentiable`-conforming type.
   auto buildZeroArgument = [&](SILType zeroSILType) {
     auto zeroSILObjType = zeroSILType.getObjectType();
     auto zeroType = zeroSILType.getASTType();
     auto *swiftMod = getModule().getSwiftModule();
     auto tangentSpace = zeroType->getAutoDiffAssociatedTangentSpace(
       LookUpConformanceInModule(swiftMod));
     assert(tangentSpace && "No tangent space for this type");
     switch (tangentSpace->getKind()) {
     case VectorSpace::Kind::Vector: {
       auto *buf = builder.createAllocStack(loc, zeroSILObjType);
       localAllocations.push_back(buf);
       emitZeroIntoBuffer(builder, zeroType, buf, loc);
       if (zeroSILType.isAddress())
         arguments.push_back(buf);
       else {
         auto *arg = builder.createLoad(loc, buf,
                                        LoadOwnershipQualifier::Unqualified);
         arguments.push_back(arg);
       }
       break;
     }
     case VectorSpace::Kind::Tuple: {
       llvm_unreachable(
           "Unimplemented: Handle zero initialization for tuples");
     }
     case VectorSpace::Kind::Function:
       llvm_unreachable(
           "Unimplemented: Emit thunks for abstracting zero initialization");
     }
   };

   // `actualIndices` and `desiredIndices` are with respect to the original
   // function. However, the differential parameters and pullback results may
   // already be w.r.t. a subset. We create a map between the original function's
   // actual parameter indices and the linear map's actual indices.
   // Example:
   //   Original: (T0, T1, T2) -> R
   //   Actual indices: 0, 2
   //   Original differential: (T0, T2) -> R
   //   Original pullback: R -> (T0, T2)
   //   Desired indices w.r.t. original: 2
   //   Desired indices w.r.t. linear map: 1
   SmallVector<unsigned, 4> actualParamIndicesMap(
       actualIndices.parameters->getCapacity(), UINT_MAX);
   {
     unsigned indexInBitVec = 0;
     for (auto index : actualIndices.parameters->getIndices()) {
       actualParamIndicesMap[index] = indexInBitVec;
       indexInBitVec++;
     }
   }
   auto mapOriginalParameterIndex = [&](unsigned index) -> unsigned {
     auto mappedIndex = actualParamIndicesMap[index];
     assert(mappedIndex < actualIndices.parameters->getCapacity());
     return mappedIndex;
   };

   switch (kind) {
   // Differential arguments are:
   // - All indirect results, followed by:
   // - An interleaving of:
   //   - Thunk arguments (when parameter index is in both desired and actual
   //     indices).
   //   - Zeros (when parameter is not in desired indices).
   case AutoDiffDerivativeFunctionKind::JVP: {
     // Forward all indirect results.
     arguments.append(thunk->getIndirectResults().begin(),
                      thunk->getIndirectResults().end());
     auto toArgIter = thunk->getArgumentsWithoutIndirectResults().begin();
     auto useNextArgument = [&]() {
       arguments.push_back(*toArgIter++);
     };
     // Iterate over actual indices.
     for (unsigned i : actualIndices.parameters->getIndices()) {
       // If index is desired, use next argument.
       if (desiredIndices.isWrtParameter(i)) {
         useNextArgument();
       }
       // Otherwise, construct and use a zero argument.
       else {
         auto zeroSILType =
             linearMapType->getParameters()[mapOriginalParameterIndex(i)]
                 .getSILStorageType();
         buildZeroArgument(zeroSILType);
       }
     }
     break;
   }
   // Pullback arguments are:
   // - An interleaving of:
   //   - Thunk indirect results (when parameter index is in both desired and
   //     actual indices).
   //   - Zeros (when parameter is not in desired indices).
   // - All actual arguments.
   case AutoDiffDerivativeFunctionKind::VJP: {
     auto toIndirectResultsIter = thunk->getIndirectResults().begin();
     auto useNextResult = [&]() {
       arguments.push_back(*toIndirectResultsIter++);
     };
     // Iterate over actual indices.
     for (unsigned i : actualIndices.parameters->getIndices()) {
       auto resultInfo =
           linearMapType->getResults()[mapOriginalParameterIndex(i)];
       // Skip direct results. Only indirect results are relevant as arguments.
       if (resultInfo.isFormalDirect())
         continue;
       // If index is desired, use next indirect result.
       if (desiredIndices.isWrtParameter(i)) {
         useNextResult();
         continue;
       }
       // Otherwise, construct and use an uninitialized indirect result.
       auto *indirectResult =
           builder.createAllocStack(loc, resultInfo.getSILStorageType());
       localAllocations.push_back(indirectResult);
       arguments.push_back(indirectResult);
     }
     // Foward all actual non-indirect-result arguments.
     arguments.append(thunk->getArgumentsWithoutIndirectResults().begin(),
                      thunk->getArgumentsWithoutIndirectResults().end() - 1);
     break;
   }
   }

   // Get the linear map thunk argument and apply it.
   auto *linearMap = thunk->getArguments().back();
   auto *ai = builder.createApply(
       loc, linearMap, SubstitutionMap(), arguments, /*isNonThrowing*/ false);

   // If differential thunk, deallocate local allocations and directly return
   // `apply` result.
   if (kind == AutoDiffDerivativeFunctionKind::JVP) {
     for (auto *alloc : reversed(localAllocations))
       builder.createDeallocStack(loc, alloc);
     builder.createReturn(loc, ai);
     return {thunk, interfaceSubs};
   }

   // If pullback thunk, return only the desired results and clean up the
   // undesired results.
   SmallVector<SILValue, 8> pullbackDirectResults;
   extractAllElements(ai, builder, pullbackDirectResults);
   SmallVector<SILValue, 8> allResults;
   collectAllActualResultsInTypeOrder(ai, pullbackDirectResults, allResults);

   SmallVector<SILValue, 8> results;
   for (unsigned i : actualIndices.parameters->getIndices()) {
     // If result is desired:
     // - Do nothing if result is indirect.
     //   (It was already forwarded to the `apply` instruction).
     // - Push it to `results` if result is direct.
     auto result = allResults[mapOriginalParameterIndex(i)];
     if (desiredIndices.isWrtParameter(i)) {
       if (result->getType().isObject())
         results.push_back(result);
     }
     // Otherwise, cleanup the unused results.
     else {
       if (result->getType().isAddress())
         builder.emitDestroyAddrAndFold(loc, result);
       else
         builder.emitDestroyValueOperation(loc, result);
     }
   }
   // Deallocate local allocations and return final direct result.
   for (auto *alloc : reversed(localAllocations))
     builder.createDeallocStack(loc, alloc);
   auto result = joinElements(results, builder, loc);
   builder.createReturn(loc, result);

   getGeneratedFunctions().push_back(thunk);
   return {thunk, interfaceSubs};
 }

 std::pair<SILFunction *, SubstitutionMap>
 ADContext::getOrCreateSubsetParametersThunkForDerivativeFunction(
     SILValue origFnOperand, SILValue derivativeFn,
     AutoDiffDerivativeFunctionKind kind, SILAutoDiffIndices desiredIndices,
     SILAutoDiffIndices actualIndices) {
   LLVM_DEBUG(getADDebugStream()
              << "Getting a subset parameters thunk for derivative function "
              << derivativeFn << " of the original function " << origFnOperand
              << " from " << actualIndices << " to " << desiredIndices << '\n');

   auto origFnType = origFnOperand->getType().castTo<SILFunctionType>();
   auto &module = getModule();
   auto lookupConformance = LookUpConformanceInModule(module.getSwiftModule());

   // Compute target type for thunking.
   auto derivativeFnType = derivativeFn->getType().castTo<SILFunctionType>();
   auto targetType = origFnType->getAutoDiffDerivativeFunctionType(
       desiredIndices.parameters, desiredIndices.source, kind, module.Types,
       lookupConformance);
   auto *caller = derivativeFn->getFunction();
   if (targetType->hasArchetype()) {
     auto substTargetType = caller->mapTypeIntoContext(
         targetType->mapTypeOutOfContext())->getCanonicalType();
     targetType = SILType::getPrimitiveObjectType(substTargetType)
         .castTo<SILFunctionType>();
   }
   assert(derivativeFnType->getNumParameters() == targetType->getNumParameters());
   assert(derivativeFnType->getNumResults() == targetType->getNumResults());

   // Build thunk type.
   SubstitutionMap interfaceSubs;
   GenericEnvironment *genericEnv = nullptr;
   auto thunkType = buildThunkType(
       derivativeFn->getFunction(), derivativeFnType, targetType, genericEnv,
       interfaceSubs, /*withoutActuallyEscaping*/ false,
       DifferentiationThunkKind::IndexSubset);

   // FIXME: The logic for resolving `assocRef` does not reapply function
   // conversions, which is problematic if `derivativeFn` is a `partial_apply`
   // instruction.
   StringRef origName;
   if (auto *origFnRef =
           peerThroughFunctionConversions<FunctionRefInst>(origFnOperand)) {
     origName = origFnRef->getInitiallyReferencedFunction()->getName();
   } else if (auto *origMethodInst =
                  peerThroughFunctionConversions<MethodInst>(origFnOperand)) {
     origName = origMethodInst->getMember().getAnyFunctionRef()
         ->getAbstractFunctionDecl()->getNameStr();
   }
   assert(!origName.empty() && "Original function name could not be resolved");
   // TODO(TF-685): Use more principled mangling for thunks.
   std::string thunkName;
   switch (kind) {
     case AutoDiffDerivativeFunctionKind::JVP:
       thunkName = "jvp";
       break;
     case AutoDiffDerivativeFunctionKind::VJP:
       thunkName = "vjp";
   }
   Mangle::ASTMangler mangler;
   auto fromInterfaceType =
       derivativeFnType->mapTypeOutOfContext()->getCanonicalType();
   auto toInterfaceType = targetType->mapTypeOutOfContext()->getCanonicalType();
   CanType dynamicSelfType;
   thunkName = "AD__orig_" + origName.str() + "_" +
       mangler.mangleReabstractionThunkHelper(
           thunkType, fromInterfaceType, toInterfaceType, dynamicSelfType,
           module.getSwiftModule()) + "_" + desiredIndices.mangle() + "_" +
           thunkName;
   thunkName += "_subset_parameters_thunk";

   auto loc = origFnOperand.getLoc();
   SILOptFunctionBuilder fb(getTransform());
   auto *thunk = fb.getOrCreateSharedFunction(
       loc, thunkName, thunkType, IsBare, IsTransparent, caller->isSerialized(),
       ProfileCounter(), IsThunk, IsNotDynamic);

   if (!thunk->empty())
     return {thunk, interfaceSubs};

   thunk->setOwnershipEliminated();
   thunk->setGenericEnvironment(genericEnv);
   auto *entry = thunk->createBasicBlock();
   SILBuilder builder(entry);
   createEntryArguments(thunk);

   SubstitutionMap assocSubstMap;
   if (auto *partialApply = dyn_cast<PartialApplyInst>(derivativeFn))
     assocSubstMap = partialApply->getSubstitutionMap();

   // FIXME: The logic for resolving `assocRef` does not reapply function
   // conversions, which is problematic if `derivativeFn` is a `partial_apply`
   // instruction.
   SILValue assocRef;
   if (auto *derivativeFnRef =
           peerThroughFunctionConversions<FunctionRefInst>(derivativeFn)) {
     auto *assoc = derivativeFnRef->getReferencedFunctionOrNull();
     assocRef = builder.createFunctionRef(loc, assoc);
   } else if (auto *assocMethodInst =
                  peerThroughFunctionConversions<WitnessMethodInst>(derivativeFn)) {
     assocRef = builder.createWitnessMethod(
         loc, assocMethodInst->getLookupType(),
         assocMethodInst->getConformance(), assocMethodInst->getMember(),
         thunk->mapTypeIntoContext(assocMethodInst->getType()));
   } else if (auto *assocMethodInst =
                  peerThroughFunctionConversions<ClassMethodInst>(derivativeFn)) {
     auto classOperand = thunk->getArgumentsWithoutIndirectResults().back();
     auto classOperandType = assocMethodInst->getOperand()->getType();
     assert(classOperand->getType() == classOperandType);
     assocRef = builder.createClassMethod(
         loc, classOperand, assocMethodInst->getMember(),
         thunk->mapTypeIntoContext(assocMethodInst->getType()));
   }
   assert(assocRef && "Expected derivative function to be resolved");

   assocSubstMap = assocSubstMap.subst(thunk->getForwardingSubstitutionMap());
   derivativeFnType = assocRef->getType().castTo<SILFunctionType>();

   SmallVector<SILValue, 4> arguments;
   arguments.append(thunk->getArguments().begin(), thunk->getArguments().end());
   assert(arguments.size() == derivativeFnType->getNumParameters() +
                                  derivativeFnType->getNumIndirectFormalResults());
   auto *apply = builder.createApply(
       loc, assocRef, assocSubstMap, arguments, /*isNonThrowing*/ false);

   // Extract all direct results.
   SmallVector<SILValue, 8> directResults;
   extractAllElements(apply, builder, directResults);
   auto originalDirectResults = ArrayRef<SILValue>(directResults).drop_back(1);
   auto originalDirectResult =
       joinElements(originalDirectResults, builder, apply->getLoc());
   auto linearMap = directResults.back();

   auto linearMapType = linearMap->getType().castTo<SILFunctionType>();
   auto linearMapTargetType = targetType->getResults().back().getSILStorageType()
       .castTo<SILFunctionType>();

   SILFunction *linearMapThunk;
   SubstitutionMap linearMapSubs;
   std::tie(linearMapThunk, linearMapSubs) =
       getOrCreateSubsetParametersThunkForLinearMap(
           thunk, linearMapType, linearMapTargetType, kind,
           desiredIndices, actualIndices);

   auto *linearMapThunkFRI = builder.createFunctionRef(loc, linearMapThunk);
   auto *thunkedLinearMap = builder.createPartialApply(
       loc, linearMapThunkFRI, linearMapSubs, {linearMap},
       ParameterConvention::Direct_Guaranteed);

   assert(origFnType->getResults().size() == 1);
   if (origFnType->getResults().front().isFormalDirect()) {
     auto result = joinElements(
         {originalDirectResult, thunkedLinearMap}, builder, loc);
     builder.createReturn(loc, result);
   } else {
     builder.createReturn(loc, thunkedLinearMap);
   }

   getGeneratedFunctions().push_back(thunk);
   return {thunk, interfaceSubs};
 }

 SILValue ADContext::promoteToDifferentiableFunction(
     DifferentiableFunctionInst *dfi, SILBuilder &builder, SILLocation loc,
     DifferentiationInvoker invoker) {
   auto origFnOperand = dfi->getOriginalFunction();
   auto origFnTy = origFnOperand->getType().castTo<SILFunctionType>();
   auto parameterIndices = dfi->getParameterIndices();
   unsigned resultIndex = resultIndices[dfi];

   // Handle curry thunk applications specially.
   if (auto *ai = dyn_cast<ApplyInst>(origFnOperand)) {
     if (auto *thunkRef = dyn_cast<FunctionRefInst>(ai->getCallee())) {
       // Create a new curry thunk.
       SILAutoDiffIndices desiredIndices(resultIndex, parameterIndices);
       auto *thunk = thunkRef->getReferencedFunctionOrNull();
       // TODO(TF-685): Use more principled mangling for thunks.
       auto newThunkName = "AD__" + thunk->getName().str() +
           "__differentiable_curry_thunk_" + desiredIndices.mangle();

       auto thunkTy = thunk->getLoweredFunctionType();
       auto thunkResult = thunkTy->getSingleResult();
       if (auto resultFnTy = thunkResult.getType()->getAs<SILFunctionType>()) {
         // Construct new curry thunk type with `@differentiable` result.
         auto diffableResultFnTy = resultFnTy->getWithExtInfo(
             resultFnTy->getExtInfo()
                 .withDifferentiabilityKind(DifferentiabilityKind::Normal));
         auto newThunkResult = thunkResult.getWithType(diffableResultFnTy);
         auto thunkType = SILFunctionType::get(
             thunkTy->getGenericSignature(), thunkTy->getExtInfo(),
             thunkTy->getCoroutineKind(), thunkTy->getCalleeConvention(),
             thunkTy->getParameters(), {}, {newThunkResult}, {},
             thunkTy->getASTContext());

         // Construct new curry thunk, returning a `@differentiable` function.
         SILOptFunctionBuilder fb(transform);
         auto *newThunk = fb.getOrCreateFunction(
             loc, newThunkName,
             getSpecializedLinkage(thunk, thunk->getLinkage()), thunkType,
             thunk->isBare(), thunk->isTransparent(), thunk->isSerialized(),
             thunk->isDynamicallyReplaceable(), ProfileCounter(),
             thunk->isThunk());
         // If new thunk is newly created: clone the old thunk body, wrap the
         // returned function value with an `differentiable_function`
         // instruction, and process the `differentiable_function` instruction.
         if (newThunk->empty()) {
           if (auto newThunkGenSig = thunkType->getGenericSignature())
             newThunk->setGenericEnvironment(
                 newThunkGenSig->getGenericEnvironment());
           newThunk->setOwnershipEliminated();
           BasicTypeSubstCloner cloner(thunk, newThunk);
           cloner.run();
           auto *retInst =
               cast<ReturnInst>(newThunk->findReturnBB()->getTerminator());
           SILBuilder thunkBuilder(retInst);
           auto *dfi = createDifferentiableFunction(thunkBuilder, loc,
                                                    parameterIndices,
                                                    retInst->getOperand());
           resultIndices[dfi] = resultIndex;
           thunkBuilder.createReturn(loc, dfi);
           retInst->eraseFromParent();

           getGeneratedFunctions().push_back(newThunk);
           getDifferentiableFunctionInsts().push_back(dfi);
           if (processDifferentiableFunctionInst(dfi))
             return nullptr;
         }

         // Apply the new curry thunk.
         auto *newThunkRef = builder.createFunctionRef(loc, newThunk);
         getGeneratedFunctionReferences().push_back(newThunkRef);
         SmallVector<SILValue, 8> newArgs;
         SmallVector<SILValue, 8> newArgsToDestroy;
         SmallVector<AllocStackInst *, 1> newBuffersToDealloc;
         copyParameterArgumentsForApply(ai, newArgs, newArgsToDestroy,
                                        newBuffersToDealloc);
         auto *newApply = builder.createApply(
             ai->getLoc(), newThunkRef, ai->getSubstitutionMap(), newArgs,
             ai->isNonThrowing());
         for (auto arg : newArgsToDestroy) {
           if (arg->getType().isObject())
             builder.emitDestroyValueOperation(loc, arg);
           else
             builder.emitDestroyAddr(loc, arg);
         }
         for (auto *alloc : newBuffersToDealloc)
           builder.createDeallocStack(loc, alloc);
         return newApply;
       }
     }
   }

   SILAutoDiffIndices desiredIndices(resultIndex, parameterIndices);
   SmallVector<SILValue, 2> derivativeFns;
   SmallVector<AllocStackInst *, 2> newBuffersToDealloc;
   for (auto derivativeFnKind : {AutoDiffDerivativeFunctionKind::JVP,
                            AutoDiffDerivativeFunctionKind::VJP}) {
     auto derivativeFnAndIndices = emitDerivativeFunctionReference(
         *this, builder, desiredIndices, derivativeFnKind, origFnOperand, invoker,
         newBuffersToDealloc);
     // Show an error at the operator, highlight the argument, and show a note
     // at the definition site of the argument.
     if (!derivativeFnAndIndices)
       return nullptr;

     auto derivativeFn = derivativeFnAndIndices->first;
     getGeneratedFunctionReferences().push_back(derivativeFn);

     // If desired indices are a subset of actual indices, create a "subset
     // indices thunk" and destroy the emitted derivative function reference.
     // - For JVPs: the thunked JVP returns a differential taking fewer
     //   parameters (using `.zero` for the dropped parameters).
     // - For VJPs: the thunked VJP returns a pullback that drops the unused
     //   tangent values.
     auto actualIndices = derivativeFnAndIndices->second;
     // NOTE: `desiredIndices` may come from a partially-applied function and
     // have smaller capacity than `actualIndices`. We expect this logic to go
     // away when we support `@differentiable` partial apply.
     // if (actualIndices != desiredIndices) { // TODO: Re-enable.
     auto extendedDesiredIndices = desiredIndices.parameters->extendingCapacity(
         getASTContext(), actualIndices.parameters->getCapacity());
     if (actualIndices.source != desiredIndices.source ||
         !actualIndices.parameters->equals(extendedDesiredIndices)) {
       // Destroy the already emitted derivative function reference because it
       // is no longer used.
       builder.emitDestroyValueOperation(loc, derivativeFn);
       // Check if underlying original function reference has been partially
       // applied with arguments. If so, produce an error: parameter subset
       // thunks do not yet support this case because partially applied arguments
       // cannot be propagated to parameter subset thunks.
       auto didPartiallyApplyArguments = [](SILValue original) {
         while (auto *pai =
                    peerThroughFunctionConversions<PartialApplyInst>(original)) {
           if (pai->getNumArguments() > 0)
             return true;
           original = pai->getCallee();
         }
         return false;
       };
       if (didPartiallyApplyArguments(origFnOperand)) {
         emitNondifferentiabilityError(
             origFnOperand, invoker,
             diag::autodiff_cannot_param_subset_thunk_partially_applied_orig_fn);
         return nullptr;
       }
       // Create the parameter subset thunk.
       assert(actualIndices.parameters->isSupersetOf(extendedDesiredIndices));
       SILFunction *thunk;
       SubstitutionMap interfaceSubs;
       std::tie(thunk, interfaceSubs) =
           getOrCreateSubsetParametersThunkForDerivativeFunction(
               origFnOperand, derivativeFn, derivativeFnKind, desiredIndices,
               actualIndices);
       auto *thunkFRI = builder.createFunctionRef(loc, thunk);
       if (auto genSig =
               thunk->getLoweredFunctionType()->getGenericSignature()) {
         derivativeFn = builder.createPartialApply(
             loc, thunkFRI, interfaceSubs, {},
             ParameterConvention::Direct_Guaranteed);
       } else {
         derivativeFn = thunkFRI;
       }
     }
     auto expectedDerivativeFnTy = origFnTy->getAutoDiffDerivativeFunctionType(
         parameterIndices, resultIndex, derivativeFnKind, getTypeConverter(),
         LookUpConformanceInModule(getModule().getSwiftModule()));
     // If `derivativeFn` is `@convention(thin)` but is expected to be
     // `@convention(thick)`, emit a `thin_to_thick` instruction.
     if (expectedDerivativeFnTy->getRepresentation()
             == SILFunctionTypeRepresentation::Thick &&
         derivativeFn->getType().castTo<SILFunctionType>()->getRepresentation()
             == SILFunctionTypeRepresentation::Thin) {
       derivativeFn = builder.createThinToThickFunction(
           loc, derivativeFn, SILType::getPrimitiveObjectType(expectedDerivativeFnTy));
     }

     derivativeFns.push_back(derivativeFn);
   }
   // Deallocate temporary buffers used for creating derivative functions.
   for (auto *buf : reversed(newBuffersToDealloc))
     builder.createDeallocStack(loc, buf);

   auto origFnCopy = builder.emitCopyValueOperation(loc, origFnOperand);
   auto *newDFI = createDifferentiableFunction(
       builder, loc, parameterIndices, origFnCopy,
       std::make_pair(derivativeFns[0], derivativeFns[1]));
   resultIndices[dfi] = resultIndex;
   getDifferentiableFunctionInsts().push_back(dfi);

   return newDFI;
 }

 /// Fold `differentiable_function_extract` users of the given
 /// `differentiable_function` instruction, directly replacing them with
 /// `differentiable_function` instruction operands. If the
 /// `differentiable_function` instruction has no remaining uses, delete the
 /// instruction itself after folding.
 ///
 /// Folding can be disabled by the `SkipFoldingDifferentiableFunctionExtraction`
 /// flag for SIL testing purposes.
 // FIXME: This function is not correctly detecting the foldable pattern and
 // needs to be rewritten.
 void ADContext::foldDifferentiableFunctionExtraction(
     DifferentiableFunctionInst *source) {
   // Iterate through all `differentiable_function` instruction uses.
   for (auto use : source->getUses()) {
     auto *dfei = dyn_cast<DifferentiableFunctionExtractInst>(use->getUser());
     // If user is not an `differentiable_function_extract` instruction, set flag
     // to false.
     if (!dfei)
       continue;
     // Fold original function extractors.
     if (dfei->getExtractee() ==
             NormalDifferentiableFunctionTypeComponent::Original) {
       auto originalFnValue = source->getOriginalFunction();
       dfei->replaceAllUsesWith(originalFnValue);
       dfei->eraseFromParent();
       continue;
     }
     // Fold derivative function extractors.
     auto derivativeFnValue =
         source->getDerivativeFunction(dfei->getDerivativeFunctionKind());
     dfei->replaceAllUsesWith(derivativeFnValue);
     dfei->eraseFromParent();
   }
   // If the `differentiable_function` instruction has no remaining uses, erase
   // it.
   if (isInstructionTriviallyDead(source)) {
     SILBuilder builder(source);
     builder.emitDestroyAddrAndFold(source->getLoc(), source->getJVPFunction());
     builder.emitDestroyAddrAndFold(source->getLoc(), source->getVJPFunction());
     source->eraseFromParent();
   }
   // Mark `source` as processed so that it won't be reprocessed after deletion.
   processedDifferentiableFunctionInsts.insert(source);
 }

 bool ADContext::processDifferentiableFunctionInst(
     DifferentiableFunctionInst *dfi) {
   LLVM_DEBUG({
     auto &s = getADDebugStream() << "Processing DifferentiableFunctionInst:\n";
     dfi->printInContext(s);
   });
   if (dfi->hasDerivativeFunctions())
     return false;

   SILFunction *parent = dfi->getFunction();
   auto loc = dfi->getLoc();
   SILBuilder builder(dfi);

   auto differentiableFnValue =
       promoteToDifferentiableFunction(dfi, builder, loc, dfi);
   // Mark `dfi` as processed so that it won't be reprocessed after deletion.
   processedDifferentiableFunctionInsts.insert(dfi);
   if (!differentiableFnValue)
     return true;
   // Replace all uses of `dfi`.
   dfi->replaceAllUsesWith(differentiableFnValue);
   // Destroy the original operand.
   builder.emitDestroyValueOperation(loc, dfi->getOriginalFunction());
   dfi->eraseFromParent();
   // If the promoted `@differentiable` function-typed value is an
   // `differentiable_function` instruction, fold
   // `differentiable_function_extract` instructions. If
   // `differentiable_function_extract` folding is disabled, return.
   if (!SkipFoldingDifferentiableFunctionExtraction)
     if (auto *newDFI =
             dyn_cast<DifferentiableFunctionInst>(differentiableFnValue))
       foldDifferentiableFunctionExtraction(newDFI);
   transform.invalidateAnalysis(
       parent, SILAnalysis::InvalidationKind::FunctionBody);
   return false;
 }

 /// AD pass entry.
 void Differentiation::run() {
   auto &module = *getModule();
   auto &astCtx = module.getASTContext();
   debugDump(module);

   // A global differentiation context.
   ADContext context(*this);

   bool errorOccurred = false;

   // Register all `@differentiable` attributes and `differentiable_function`
   // instructions in the module that trigger differentiation.
   for (SILFunction &f : module) {
     for (auto *diffAttr : f.getDifferentiableAttrs()) {
       DifferentiationInvoker invoker(diffAttr);
       assert(!context.getInvokers().count(diffAttr) &&
              "[differentiable] attribute already has an invoker");
       context.getInvokers().insert({diffAttr, invoker});
       continue;
     }
     for (SILBasicBlock &bb : f) {
       for (SILInstruction &i : bb) {
         if (auto *dfi = dyn_cast<DifferentiableFunctionInst>(&i))
           context.getDifferentiableFunctionInsts().push_back(dfi);
         else if (auto *lfi = dyn_cast<LinearFunctionInst>(&i)) {
           astCtx.Diags.diagnose(
               lfi->getLoc().getSourceLoc(),
               diag::autodiff_conversion_to_linear_function_not_supported);
           errorOccurred = true;
         }
       }
     }
   }

   // If nothing has triggered differentiation, there's nothing to do.
   if (context.getInvokers().empty() &&
       context.getDifferentiableFunctionInsts().empty())
     return;

   // AD relies on stdlib (the Swift module). If it's not imported, it's an
   // internal error.
   if (!astCtx.getStdlibModule()) {
     astCtx.Diags.diagnose(SourceLoc(),
                           diag::autodiff_internal_swift_not_imported);
     return;
   }

   // Process all `[differentiable]` attributes.
   for (auto invokerPair : context.getInvokers()) {
     auto *attr = invokerPair.first;
     auto *original = attr->getOriginal();
     auto invoker = invokerPair.second;
     errorOccurred |=
         context.processDifferentiableAttribute(original, attr, invoker);
   }

   // Iteratively process `differentiable_function` instruction worklist.
   while (!context.getDifferentiableFunctionInsts().empty()) {
     auto *dfi = context.getDifferentiableFunctionInsts().back();
     context.getDifferentiableFunctionInsts().pop_back();
     // Skip instructions that have been already been processed.
     if (context.getProcessedDifferentiableFunctionInsts().count(dfi)) continue;
     errorOccurred |= context.processDifferentiableFunctionInst(dfi);
   }

   // If any error occurred while processing `[differentiable]` attributes or
   // `differentiable_function` instructions, clean up.
   if (errorOccurred) {
     context.cleanUp();
     return;
   }

   LLVM_DEBUG(getADDebugStream() << "All differentiation finished\n");
 }

 //===----------------------------------------------------------------------===//
 // Pass creation
 //===----------------------------------------------------------------------===//

 SILTransform *swift::createDifferentiation() {
   return new Differentiation;
 }