blob: 575fbe7762bf8bc6538bf24b6dba2c644a29e71b [file] [log] [blame]
//===--- Differentiation.cpp - SIL Automatic Differentiation --*- C++ -*---===//
// This source file is part of the open source project
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
// See for license information
// See for the list of Swift project authors
// This file implements automatic differentiation.
// NOTE: Although the AD feature is developed as part of the Swift for
// TensorFlow project, it is completely independent from TensorFlow support.
#define DEBUG_TYPE "differentiation"
#include "Differentiation.h"
#include "swift/AST/ASTMangler.h"
#include "swift/AST/ASTPrinter.h"
#include "swift/AST/AnyFunctionRef.h"
#include "swift/AST/AutoDiff.h"
#include "swift/AST/Builtins.h"
#include "swift/AST/DeclContext.h"
#include "swift/AST/DiagnosticsSIL.h"
#include "swift/AST/Expr.h"
#include "swift/AST/GenericEnvironment.h"
#include "swift/AST/GenericSignatureBuilder.h"
#include "swift/AST/LazyResolver.h"
#include "swift/AST/SourceFile.h"
#include "swift/AST/ParameterList.h"
#include "swift/AST/SubstitutionMap.h"
#include "swift/AST/TypeCheckRequests.h"
#include "swift/SIL/FormalLinkage.h"
#include "swift/SIL/LoopInfo.h"
#include "swift/SIL/Projection.h"
#include "swift/SIL/SILBuilder.h"
#include "swift/SIL/TypeSubstCloner.h"
#include "swift/SILOptimizer/Analysis/DominanceAnalysis.h"
#include "swift/SILOptimizer/Analysis/LoopAnalysis.h"
#include "swift/SILOptimizer/PassManager/Passes.h"
#include "swift/SILOptimizer/PassManager/Transforms.h"
#include "swift/SILOptimizer/Utils/LoopUtils.h"
#include "swift/SILOptimizer/Utils/SILOptFunctionBuilder.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/BreadthFirstIterator.h"
#include "llvm/ADT/DenseSet.h"
using namespace swift;
using llvm::DenseMap;
using llvm::SmallDenseMap;
using llvm::SmallDenseSet;
using llvm::SmallMapVector;
using llvm::SmallSet;
/// This flag is used to disable `differentiable_function_extract` instruction
/// folding for SIL testing purposes.
static llvm::cl::opt<bool> SkipFoldingDifferentiableFunctionExtraction(
/// This flag is used to enable full JVP generation.
/// It will be removed when JVP/differential generation is robust.
static llvm::cl::opt<bool> RunJVPGeneration(
// Helpers
/// Prints an "[AD] " prefix to `llvm::dbgs()` and returns the debug stream.
/// This is being used to print short debug messages within the AD pass.
static raw_ostream &getADDebugStream() { return llvm::dbgs() << "[AD] "; }
/// Given a dumpable value, dumps it to `llvm::dbgs()`.
template <typename T> static inline void debugDump(T &v) {
LLVM_DEBUG(llvm::dbgs() << "\n==== BEGIN DEBUG DUMP ====\n"
<< v << "\n==== END DEBUG DUMP ====\n");
static bool isWithoutDerivative(SILValue v) {
if (auto *fnRef = dyn_cast<FunctionRefInst>(v))
return fnRef->getReferencedFunctionOrNull()->hasSemanticsAttr(
return false;
static bool isArrayLiteralIntrinsic(ApplyInst *ai) {
return ai->hasSemantics("array.uninitialized_intrinsic");
static ApplyInst *getAllocateUninitializedArrayIntrinsic(SILValue v) {
if (auto *applyInst = dyn_cast<ApplyInst>(v))
if (isArrayLiteralIntrinsic(applyInst))
return applyInst;
return nullptr;
/// Given a value, find its single `destructure_tuple` user if the value is
/// tuple-typed and such a user exists.
static DestructureTupleInst *getSingleDestructureTupleUser(SILValue value) {
bool foundDestructureTupleUser = false;
if (!value->getType().is<TupleType>())
return nullptr;
DestructureTupleInst *result = nullptr;
for (auto *use : value->getUses()) {
if (auto *dti = dyn_cast<DestructureTupleInst>(use->getUser())) {
assert(!foundDestructureTupleUser &&
"There should only be one `destructure_tuple` user of a tuple");
foundDestructureTupleUser = true;
result = dti;
return result;
/// Given an `apply` instruction, apply the given callback to each of its
/// direct results. If the `apply` instruction has a single `destructure_tuple`
/// user, apply the callback to the results of the `destructure_tuple` user.
static void forEachApplyDirectResult(
ApplyInst *ai, llvm::function_ref<void(SILValue)> resultCallback) {
if (!ai->getType().is<TupleType>()) {
if (auto *dti = getSingleDestructureTupleUser(ai))
for (auto result : dti->getResults())
/// Given a function, gather all of its formal results (both direct and
/// indirect) in an order defined by its result type. Note that "formal results"
/// refer to result values in the body of the function, not at call sites.
static void
collectAllFormalResultsInTypeOrder(SILFunction &function,
SmallVectorImpl<SILValue> &results) {
SILFunctionConventions convs(function.getLoweredFunctionType(),
auto indResults = function.getIndirectResults();
auto *retInst = cast<ReturnInst>(function.findReturnBB()->getTerminator());
auto retVal = retInst->getOperand();
SmallVector<SILValue, 8> dirResults;
if (auto *tupleInst =
unsigned indResIdx = 0, dirResIdx = 0;
for (auto &resInfo : convs.getResults())
results.push_back(resInfo.isFormalDirect() ? dirResults[dirResIdx++]
: indResults[indResIdx++]);
/// Given a function, gather all of its direct results in an order defined by
/// its result type. Note that "formal results" refer to result values in the
/// body of the function, not at call sites.
static void
collectAllDirectResultsInTypeOrder(SILFunction &function,
SmallVectorImpl<SILValue> &results) {
SILFunctionConventions convs(function.getLoweredFunctionType(),
auto *retInst = cast<ReturnInst>(function.findReturnBB()->getTerminator());
auto retVal = retInst->getOperand();
if (auto *tupleInst = dyn_cast<TupleInst>(retVal))
/// Given a function call site, gather all of its actual results (both direct
/// and indirect) in an order defined by its result type.
static void collectAllActualResultsInTypeOrder(
ApplyInst *ai, ArrayRef<SILValue> extractedDirectResults,
SmallVectorImpl<SILValue> &results) {
auto calleeConvs = ai->getSubstCalleeConv();
unsigned indResIdx = 0, dirResIdx = 0;
for (auto &resInfo : calleeConvs.getResults()) {
? extractedDirectResults[dirResIdx++]
: ai->getIndirectSILResults()[indResIdx++]);
/// Given a range of types, joins these into a single type. If there's exactly
/// one element type, returns that element type. Otherwise, creates a tuple type
/// of all element types.
template <typename TypeRange>
static CanType joinElementTypes(TypeRange &&range, const ASTContext &ctx) {
if (range.size() == 1)
return range.front();
auto typeElts =
map<SmallVector<TupleTypeElt, 8>>(range, [&](Type type) { return type; });
return TupleType::get(typeElts, ctx);
/// Given a range of SIL values, retrieves the canonical types of these values,
/// and joins these types into a single type.
template <typename SILValueRange>
static CanType joinElementTypesFromValues(SILValueRange &&range,
const ASTContext &ctx) {
if (range.size() == 1)
return range.front()->getType().getASTType();
SmallVector<TupleTypeElt, 8> elts;
transform(range, elts.begin(),
[&](SILValue val) { return val->getType().getASTType(); });
return TupleType::get(elts, ctx)->getCanonicalType();
/// Given an operator name, such as '+', and a protocol, returns the '+'
/// operator. If the operator does not exist in the protocol, returns null.
static FuncDecl *findOperatorDeclInProtocol(DeclName operatorName,
ProtocolDecl *protocol) {
// Find the operator requirement in the given protocol declaration.
auto opLookup = protocol->lookupDirect(operatorName);
for (auto *decl : opLookup) {
if (!decl->isProtocolRequirement())
auto *fd = dyn_cast<FuncDecl>(decl);
if (!fd || !fd->isStatic() || !fd->isOperator())
return fd;
// Not found.
return nullptr;
/// Returns the "constrained" derivative generic signature given:
/// - An original SIL function type.
/// - A wrt parameter index subset.
/// - A possibly uncanonical derivative generic signature (optional).
/// - Additional derivative requirements (optional).
/// The constrained derivative generic signature constrains all wrt parameters
/// to conform to `Differentiable`.
static GenericSignature getConstrainedDerivativeGenericSignature(
CanSILFunctionType originalFnTy, IndexSubset *paramIndexSet,
GenericSignature derivativeGenSig) {
if (!derivativeGenSig)
derivativeGenSig = originalFnTy->getGenericSignature();
if (!derivativeGenSig)
return nullptr;
// Constrain all wrt parameters to `Differentiable`.
auto &ctx = derivativeGenSig->getASTContext();
auto *diffableProto = ctx.getProtocol(KnownProtocolKind::Differentiable);
SmallVector<Requirement, 4> requirements;
for (unsigned paramIdx : paramIndexSet->getIndices()) {
auto paramType = originalFnTy->getParameters()[paramIdx].getType();
Requirement req(RequirementKind::Conformance, paramType,
return evaluateOrDefault(
/*addedGenericParams*/ {},
/// Returns the canonical derivative generic signature for the given
/// `[differentiable]` attribute and original function.
/// - Return the `[differentiable]` attribute derivative generic signature if
/// it exists.
/// - Otherwise, return the original function's generic signature.
static CanGenericSignature getDerivativeGenericSignature(
SILDifferentiableAttr *attr, SILFunction *original) {
if (auto attrDerivativeGenSig = attr->getDerivativeGenericSignature())
return attrDerivativeGenSig->getCanonicalSignature();
return original->getLoweredFunctionType()->getGenericSignature();
// Clone the generic parameters of the given generic signature and return a new
// `GenericParamList`.
static GenericParamList *cloneGenericParameters(ASTContext &ctx,
DeclContext *dc,
CanGenericSignature sig) {
SmallVector<GenericTypeParamDecl *, 2> clonedParams;
for (auto paramType : sig->getGenericParams()) {
auto clonedParam = new (ctx) GenericTypeParamDecl(
dc, paramType->getName(), SourceLoc(), paramType->getDepth(),
return GenericParamList::create(ctx, SourceLoc(), clonedParams, SourceLoc());
/// Given an `differentiable_function` instruction, find the corresponding
/// differential operator used in the AST. If no differential operator is found,
/// return nullptr.
static DifferentiableFunctionExpr *
findDifferentialOperator(DifferentiableFunctionInst *inst) {
return inst->getLoc().getAsASTNode<DifferentiableFunctionExpr>();
/// Returns the underlying instruction for the given SILValue, if it exists,
/// peering through function conversion instructions.
template<class Inst>
static Inst *peerThroughFunctionConversions(SILValue value) {
if (auto *inst = dyn_cast<Inst>(value))
return inst;
if (auto *thinToThick = dyn_cast<ThinToThickFunctionInst>(value))
return peerThroughFunctionConversions<Inst>(thinToThick->getOperand());
if (auto *convertFn = dyn_cast<ConvertFunctionInst>(value))
return peerThroughFunctionConversions<Inst>(convertFn->getOperand());
if (auto *partialApply = dyn_cast<PartialApplyInst>(value))
return peerThroughFunctionConversions<Inst>(partialApply->getCallee());
return nullptr;
// Auxiliary data structures
namespace {
class ADContext;
/// The invoker of a differentiation task. It can be some user syntax, e.g.
/// an `differentiable_function` instruction lowered from an
/// `DifferentiableFunctionExpr` expression, the differentiation pass, or
/// nothing at all. This will be used to emit informative diagnostics.
struct DifferentiationInvoker {
/// The kind of the invoker of a differentiation task.
enum class Kind {
// Invoked by an `differentiable_function` instruction, which may or may not
// be linked to a Swift AST node (e.g. an `DifferentiableFunctionExpr`
// expression).
// Invoked by the indirect application of differentiation. This case has an
// associated original `apply` instruction and `[differentiable]` attribute.
// Invoker by a `[differentiable]` attribute in SIL **without** being linked
// to a Swift AST attribute. This case has an associated `[differentiable]`
// attribute.
Kind kind;
union Value {
/// The instruction associated with the `DifferentiableFunctionInst` case.
DifferentiableFunctionInst *diffFuncInst;
Value(DifferentiableFunctionInst *inst) : diffFuncInst(inst) {}
/// The parent `apply` instruction and `[differentiable]` attribute
/// associated with the `IndirectDifferentiation` case.
std::pair<ApplyInst *, SILDifferentiableAttr *>
Value(ApplyInst *applyInst, SILDifferentiableAttr *attr)
: indirectDifferentiation({applyInst, attr}) {}
/// The `[differentiable]` attribute associated with the
/// `SILDifferentiableAttribute` case.
SILDifferentiableAttr *silDifferentiableAttribute;
Value(SILDifferentiableAttr *attr) : silDifferentiableAttribute(attr) {}
} value;
DifferentiationInvoker(Kind kind, Value value) : kind(kind), value(value) {}
DifferentiationInvoker(DifferentiableFunctionInst *inst)
: kind(Kind::DifferentiableFunctionInst), value(inst) {}
DifferentiationInvoker(ApplyInst *applyInst, SILDifferentiableAttr *attr)
: kind(Kind::IndirectDifferentiation),
value({applyInst, attr}) {}
DifferentiationInvoker(SILDifferentiableAttr *attr)
: kind(Kind::SILDifferentiableAttribute), value(attr) {}
Kind getKind() const { return kind; }
DifferentiableFunctionInst *getDifferentiableFunctionInst() const {
assert(kind == Kind::DifferentiableFunctionInst);
return value.diffFuncInst;
std::pair<ApplyInst *, SILDifferentiableAttr *>
getIndirectDifferentiation() const {
assert(kind == Kind::IndirectDifferentiation);
return value.indirectDifferentiation;
SILDifferentiableAttr *getSILDifferentiableAttribute() const {
assert(kind == Kind::SILDifferentiableAttribute);
return value.silDifferentiableAttribute;
SourceLoc getLocation() const {
switch (kind) {
case Kind::DifferentiableFunctionInst:
return getDifferentiableFunctionInst()->getLoc().getSourceLoc();
case Kind::IndirectDifferentiation:
return getIndirectDifferentiation().first->getLoc().getSourceLoc();
case Kind::SILDifferentiableAttribute:
return getSILDifferentiableAttribute()->getOriginal()
void print(llvm::raw_ostream &os) const;
class DifferentiableActivityInfo;
/// Information about the JVP/VJP function produced during JVP/VJP generation,
/// e.g. mappings from original values to corresponding values in the
/// pullback/differential struct.
/// A linear map struct is an aggregate value containing linear maps checkpointed
/// during the JVP/VJP computation. Linear map structs are generated for every
/// original function during JVP/VJP generation. Linear map struct values are
/// constructed by JVP/VJP functions and consumed by pullback/differential
/// functions.
class LinearMapInfo {
/// The linear map kind.
AutoDiffLinearMapKind kind;
/// The original function.
SILFunction *const original;
/// The derivative function.
SILFunction *const derivative;
/// Activity info of the original function.
const DifferentiableActivityInfo &activityInfo;
/// Differentiation indices of the function.
const SILAutoDiffIndices &indices;
/// Mapping from original basic blocks to linear map structs.
DenseMap<SILBasicBlock *, StructDecl *> linearMapStructs;
/// Mapping from original basic blocks to branching trace enums.
/// For pullbacks: these are predecessor enums.
/// For differentials: these are successor enums.
DenseMap<SILBasicBlock *, EnumDecl *> branchingTraceDecls;
/// Mapping from `apply` and `struct_extract` instructions in the original
/// function to the corresponding linear map declaration in the linear map
/// struct.
DenseMap<SILInstruction *, VarDecl *> linearMapValueMap;
/// Mapping from predecessor+succcessor basic block pairs in original function
/// to the corresponding branching trace enum case.
DenseMap<std::pair<SILBasicBlock *, SILBasicBlock *>, EnumElementDecl *>
/// Mapping from linear map structs to their branching trace enum fields.
DenseMap<StructDecl *, VarDecl *> linearMapStructEnumFields;
/// A type converter, used to compute struct/enum SIL types.
Lowering::TypeConverter &typeConverter;
/// Remaps the given type into the derivative function's context.
SILType remapTypeInDerivative(SILType ty) {
if (ty.hasArchetype())
return derivative->mapTypeIntoContext(ty.mapTypeOutOfContext());
return derivative->mapTypeIntoContext(ty);
/// Adds a `VarDecl` member with the given name and type to the given nominal
/// declaration.
VarDecl *addVarDecl(NominalTypeDecl *nominal, StringRef name, Type type) {
auto &astCtx = nominal->getASTContext();
auto id = astCtx.getIdentifier(name);
auto *varDecl = new (astCtx) VarDecl(
/*IsStatic*/ false, VarDecl::Introducer::Var, /*IsCaptureList*/ false,
SourceLoc(), id, nominal);
if (type->hasArchetype())
return varDecl;
/// Retrieves the file unit that contains implicit declarations in the
/// current Swift module. If it does not exist, create one.
// FIXME: Currently it defaults to the file containing `original`, if it can
// be determined. Otherwise, it defaults to any file unit in the module. To
// handle this more properly, we could revive the DerivedFileUnit class to
// contain all synthesized implicit type declarations.
SourceFile &getDeclarationFileUnit() {
if (original->hasLocation())
if (auto *declContext = original->getLocation().getAsDeclContext())
if (auto *parentSourceFile = declContext->getParentSourceFile())
return *parentSourceFile;
for (auto *file : original->getModule().getSwiftModule()->getFiles())
if (auto *src = dyn_cast<SourceFile>(file))
return *src;
llvm_unreachable("No files?");
/// Compute and set the access level for the given nominal type, given the
/// original function linkage.
void computeAccessLevel(
NominalTypeDecl *nominal, SILLinkage originalLinkage) {
auto &astCtx = nominal->getASTContext();
switch (originalLinkage) {
case swift::SILLinkage::Public:
case swift::SILLinkage::PublicNonABI:
new (astCtx) UsableFromInlineAttr(/*Implicit*/ true));
case swift::SILLinkage::Hidden:
case swift::SILLinkage::Shared:
case swift::SILLinkage::Private:
// When the original function has external linkage, we create an internal
// struct for use by our own module. This is necessary for cross-cell
// differentiation in Jupyter.
// TODO: Add a test in the compiler that exercises a similar situation as
// cross-cell differentiation in Jupyter.
/// Creates an enum declaration with the given JVP/VJP generic signature,
/// whose cases represent the predecessors/successors of the given original
/// block.
EnumDecl *createBranchingTraceDecl(SILBasicBlock *originalBB,
SILAutoDiffIndices indices,
CanGenericSignature genericSig,
SILLoopInfo *loopInfo) {
assert(originalBB->getParent() == original);
auto &astCtx = original->getASTContext();
auto *moduleDecl = original->getModule().getSwiftModule();
auto &file = getDeclarationFileUnit();
// Create a branching trace enum.
std::string enumName;
switch (kind) {
case AutoDiffLinearMapKind::Differential:
enumName =
"_AD__" + original->getName().str() +
"_bb" + std::to_string(originalBB->getDebugID()) +
"__Succ__" + indices.mangle();
case AutoDiffLinearMapKind::Pullback:
enumName =
"_AD__" + original->getName().str() +
"_bb" + std::to_string(originalBB->getDebugID()) +
"__Pred__" + indices.mangle();
auto enumId = astCtx.getIdentifier(enumName);
auto loc = original->getLocation().getSourceLoc();
GenericParamList *genericParams = nullptr;
if (genericSig)
genericParams = cloneGenericParameters(astCtx, &file, genericSig);
auto *branchingTraceDecl = new (astCtx) EnumDecl(
/*EnumLoc*/ SourceLoc(), /*Name*/ enumId, /*NameLoc*/ loc,
/*Inherited*/ {}, /*GenericParams*/ genericParams, /*DC*/ &file);
// Note: must mark enum as implicit to satisfy assertion in
// `Parser::parseDeclListDelayed`.
if (genericSig)
// Add basic block enum cases.
for (auto *predBB : originalBB->getPredecessorBlocks()) {
auto bbId = "bb" + std::to_string(predBB->getDebugID());
auto *linearMapStruct = getLinearMapStruct(predBB);
auto linearMapStructTy =
// Create dummy declaration representing enum case parameter.
auto *decl = new (astCtx) ParamDecl(loc, loc, Identifier(), loc,
Identifier(), moduleDecl);
if (linearMapStructTy->hasArchetype())
// Create enum element and enum case declarations.
auto *paramList = ParameterList::create(astCtx, {decl});
auto *enumEltDecl = new (astCtx) EnumElementDecl(
/*IdentifierLoc*/ loc, DeclName(astCtx.getIdentifier(bbId)),
paramList, loc, /*RawValueExpr*/ nullptr, branchingTraceDecl);
auto *enumCaseDecl = EnumCaseDecl::create(
/*CaseLoc*/ loc, {enumEltDecl}, branchingTraceDecl);
// Record enum element declaration.
branchingTraceEnumCases.insert({{predBB, originalBB}, enumEltDecl});
// If original block is in a loop, mark branching trace enum as indirect.
if (loopInfo->getLoopFor(originalBB))
new (astCtx) IndirectAttr(/*Implicit*/ true));
return branchingTraceDecl;
/// Creates a struct declaration with the given JVP/VJP generic signature, for
/// storing the linear map values and predecessor/successor basic block of the
/// given original block.
StructDecl *
createLinearMapStruct(SILBasicBlock *originalBB, SILAutoDiffIndices indices,
CanGenericSignature genericSig) {
assert(originalBB->getParent() == original);
auto *original = originalBB->getParent();
auto &astCtx = original->getASTContext();
auto &file = getDeclarationFileUnit();
std::string structName;
switch (kind) {
case swift::AutoDiffLinearMapKind::Differential:
structName =
"_AD__" + original->getName().str() +
"_bb" + std::to_string(originalBB->getDebugID()) +
"__DF__" + indices.mangle();
case swift::AutoDiffLinearMapKind::Pullback:
structName =
"_AD__" + original->getName().str() +
"_bb" + std::to_string(originalBB->getDebugID()) +
"__PB__" + indices.mangle();
auto structId = astCtx.getIdentifier(structName);
GenericParamList *genericParams = nullptr;
if (genericSig)
genericParams = cloneGenericParameters(astCtx, &file, genericSig);
auto *linearMapStruct = new (astCtx) StructDecl(
/*StructLoc*/ SourceLoc(), /*Name*/ structId, /*NameLoc*/ SourceLoc(),
/*Inherited*/ {}, /*GenericParams*/ genericParams, /*DC*/ &file);
// Note: must mark struct as implicit to satisfy assertion in
// `Parser::parseDeclListDelayed`.
if (genericSig)
linearMapStruct, original->getEffectiveSymbolLinkage());
return linearMapStruct;
/// Add a linear map to the linear map struct.
VarDecl *addLinearMapDecl(SILInstruction *inst, SILType linearMapType) {
// IRGen requires decls to have AST types (not `SILFunctionType`), so we
// convert the `SILFunctionType` of the linear map to a `FunctionType` with
// the same parameters and results.
auto silFnTy = linearMapType.castTo<SILFunctionType>();
SmallVector<AnyFunctionType::Param, 8> params;
for (auto &param : silFnTy->getParameters())
AnyFunctionType *astFnTy;
if (auto genSig = silFnTy->getGenericSignature())
astFnTy = GenericFunctionType::get(
genSig, params, silFnTy->getAllResultsType().getASTType());
astFnTy = FunctionType::get(
params, silFnTy->getAllResultsType().getASTType());
auto *origBB = inst->getParent();
auto *linMapStruct = getLinearMapStruct(origBB);
std::string linearMapName;
switch (kind) {
case AutoDiffLinearMapKind::Differential:
linearMapName = "differential_" + llvm::itostr(linearMapValueMap.size());
case AutoDiffLinearMapKind::Pullback:
linearMapName = "pullback_" + llvm::itostr(linearMapValueMap.size());
auto *linearMapDecl = addVarDecl(linMapStruct, linearMapName, astFnTy);
linearMapValueMap.insert({inst, linearMapDecl});
return linearMapDecl;
/// Given an `apply` instruction, conditionally adds its linear map function
/// to the linear map struct if it is active.
void addLinearMapToStruct(ADContext &context, ApplyInst *ai,
const SILAutoDiffIndices &indices);
/// Generate linear map struct and branching enum declarations for the given
/// function. Linear map structs are populated with linear map fields and a
/// branching enum field.
void generateDifferentiationDataStructures(
ADContext &context, const SILAutoDiffIndices &indices,
SILFunction *derivative);
bool shouldDifferentiateApplyInst(ApplyInst *ai);
bool shouldDifferentiateInstruction(SILInstruction *inst);
LinearMapInfo(const LinearMapInfo &) = delete;
LinearMapInfo &operator=(const LinearMapInfo &) = delete;
explicit LinearMapInfo(ADContext &context,
AutoDiffLinearMapKind kind,
SILFunction *original, SILFunction *derivative,
const SILAutoDiffIndices &indices,
const DifferentiableActivityInfo &activityInfo);
/// Returns the linear map struct associated with the given original block.
StructDecl *getLinearMapStruct(SILBasicBlock *origBB) const {
return linearMapStructs.lookup(origBB);
/// Returns the lowered SIL type of the linear map struct associated with the
/// given original block.
SILType getLinearMapStructLoweredType(SILBasicBlock *origBB) const {
auto *linMapStruct = getLinearMapStruct(origBB);
auto linMapStructType =
return typeConverter.getLoweredType(linMapStructType,
/// Returns the branching trace enum associated with the given original block.
EnumDecl *getBranchingTraceDecl(SILBasicBlock *origBB) const {
return branchingTraceDecls.lookup(origBB);
/// Returns the lowered SIL type of the branching trace enum associated with
/// the given original block.
SILType getBranchingTraceEnumLoweredType(SILBasicBlock *origBB) const {
auto *traceDecl = getBranchingTraceDecl(origBB);
auto traceDeclType =
return typeConverter.getLoweredType(traceDeclType,
/// Returns the enum element in the given successor block's branching trace
/// enum corresponding to the given predecessor block.
EnumElementDecl *
lookUpBranchingTraceEnumElement(SILBasicBlock *origPredBB,
SILBasicBlock *origSuccBB) const {
assert(origPredBB->getParent() == original);
return branchingTraceEnumCases.lookup({origPredBB, origSuccBB});
/// Returns the mapping from linear map structs to their branching trace enum
/// fields.
DenseMap<StructDecl *, VarDecl *> &getLinearMapStructEnumFields() {
return linearMapStructEnumFields;
/// Returns the branching trace enum field for the linear map struct of the
/// given original block.
VarDecl *lookUpLinearMapStructEnumField(SILBasicBlock *origBB) {
auto *linearMapStruct = getLinearMapStruct(origBB);
return linearMapStructEnumFields.lookup(linearMapStruct);
/// Finds the linear map declaration in the pullback struct for an `apply` or
/// `struct_extract` in the original function.
VarDecl *lookUpLinearMapDecl(SILInstruction *inst) {
auto lookup = linearMapValueMap.find(inst);
assert(lookup != linearMapValueMap.end() &&
"No linear map declaration corresponding to the given instruction");
return lookup->getSecond();
/// Stores `apply` instruction information calculated by VJP generation.
struct NestedApplyInfo {
/// The differentiation indices that are used to differentiate this `apply`
/// instruction.
SILAutoDiffIndices indices;
/// The original pullback type before reabstraction. `None` if the pullback
/// type is not reabstracted.
Optional<CanSILFunctionType> originalPullbackType;
static inline llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
DifferentiationInvoker invoker) {
return os;
void DifferentiationInvoker::print(llvm::raw_ostream &os) const {
os << "(differentiation_invoker ";
switch (kind) {
case Kind::DifferentiableFunctionInst:
os << "differentiable_function_inst=(" << *getDifferentiableFunctionInst()
<< ")";
case Kind::IndirectDifferentiation: {
auto indDiff = getIndirectDifferentiation();
os << "indirect_differentiation=(" << *std::get<0>(indDiff) << ')';
// TODO: Enable printing parent invokers.
// May require storing a `DifferentiableInvoker *` in the
// `IndirectDifferentiation` case.
SILInstruction *inst;
SILDifferentiableAttr *attr;
std::tie(inst, attr) = getIndirectDifferentiation();
auto invokerLookup = invokers.find(attr); // No access to ADContext?
assert(invokerLookup != invokers.end() && "Expected parent invoker");
case Kind::SILDifferentiableAttribute: {
auto diffAttr = getSILDifferentiableAttribute();
os << "sil_differentiable_attribute=(attr=(";
os << ") function=" << diffAttr->getOriginal()->getName();
os << ')';
// ADContext - Per-module contextual information for the Differentiation pass.
class ADContext {
/// Reference to the main transform.
SILModuleTransform &transform;
/// The module where Differentiation is performed on.
SILModule &module;
/// AST context.
ASTContext &astCtx = module.getASTContext();
/// Shared pass manager.
SILPassManager &passManager;
/// The worklist (stack) of `differentiable_function` instructions to be
/// processed.
SmallVector<DifferentiableFunctionInst *, 32> differentiableFunctionInsts;
/// The set of `differentiable_function` instructions that have been
/// processed. Used to avoid reprocessing invalidated instructions.
SmallPtrSet<DifferentiableFunctionInst *, 32>
/// Mapping from `[differentiable]` attributes to invokers.
/// `SmallMapVector` is used for deterministic insertion order iteration.
SmallMapVector<SILDifferentiableAttr *, DifferentiationInvoker, 32>
/// Mapping from `differentiable_function` instructions to result indices.
DenseMap<DifferentiableFunctionInst *, unsigned> resultIndices;
/// Mapping from original `apply` instructions to their corresponding
/// `NestedApplyInfo`s.
DenseMap<ApplyInst *, NestedApplyInfo> nestedApplyInfo;
/// List of generated functions (JVPs, VJPs, pullbacks, and thunks).
/// Saved for deletion during cleanup.
SmallVector<SILFunction *, 32> generatedFunctions;
/// List of references to generated functions.
/// Saved for deletion during cleanup.
SmallVector<SILValue, 32> generatedFunctionReferences;
/// The AdditiveArithmetic protocol in the standard library.
ProtocolDecl *additiveArithmeticProtocol =
/// `AdditiveArithmetic.+` declaration.
mutable FuncDecl *cachedPlusFn = nullptr;
/// `AdditiveArithmetic.+=` declaration.
mutable FuncDecl *cachedPlusEqualFn = nullptr;
/// Construct an ADContext for the given module.
explicit ADContext(SILModuleTransform &transform);
// General utilities
SILModuleTransform &getTransform() const { return transform; }
SILModule &getModule() const { return module; }
ASTContext &getASTContext() const { return module.getASTContext(); }
SILPassManager &getPassManager() const { return passManager; }
Lowering::TypeConverter &getTypeConverter() { return module.Types; }
SmallVectorImpl<DifferentiableFunctionInst *> &
getDifferentiableFunctionInsts() {
return differentiableFunctionInsts;
SmallPtrSetImpl<DifferentiableFunctionInst *> &
getProcessedDifferentiableFunctionInsts() {
return processedDifferentiableFunctionInsts;
llvm::SmallMapVector<SILDifferentiableAttr *, DifferentiationInvoker, 32> &
getInvokers() {
return invokers;
DenseMap<DifferentiableFunctionInst *, unsigned> &getResultIndices() {
return resultIndices;
DenseMap<ApplyInst *, NestedApplyInfo> &getNestedApplyInfo() {
return nestedApplyInfo;
SmallVector<SILFunction *, 32> &getGeneratedFunctions() {
return generatedFunctions;
SmallVector<SILValue, 32> &getGeneratedFunctionReferences() {
return generatedFunctionReferences;
ProtocolDecl *getAdditiveArithmeticProtocol() const {
return additiveArithmeticProtocol;
FuncDecl *getPlusDecl() const {
if (!cachedPlusFn) {
cachedPlusFn = findOperatorDeclInProtocol(
astCtx.getIdentifier("+"), additiveArithmeticProtocol);
assert(cachedPlusFn && "AdditiveArithmetic.+ not found");
return cachedPlusFn;
FuncDecl *getPlusEqualDecl() const {
if (!cachedPlusEqualFn) {
cachedPlusEqualFn = findOperatorDeclInProtocol(
astCtx.getIdentifier("+="), additiveArithmeticProtocol);
assert(cachedPlusEqualFn && "AdditiveArithmetic.+= not found");
return cachedPlusEqualFn;
void cleanUp() {
for (auto invokerPair : invokers) {
auto *attr = std::get<0>(invokerPair);
auto *original = attr->getOriginal();
<< "Removing [differentiable] attribute for "
<< original->getName() << '\n');
// Delete all references to generated functions.
for (auto fnRef : generatedFunctionReferences) {
if (auto *fnRefInst =
peerThroughFunctionConversions<FunctionRefInst>(fnRef)) {
// Delete all generated functions.
for (auto *generatedFunction : generatedFunctions) {
<< "Deleting generated function "
<< generatedFunction->getName() << '\n');
// `[differentiable]` attribute lookup and registration
/// Finds the `[differentiable]` attribute on the specified original function
/// with the exact specified parameter indices. Returns nullptr if no such
/// attribute exists.
SILDifferentiableAttr *lookUpDifferentiableAttr(
SILFunction *original, const SILAutoDiffIndices &indices) const {
for (auto *attr : original->getDifferentiableAttrs())
if (attr->getIndices() == indices)
return attr;
return nullptr;
/// Finds the `[differentiable]` attribute on the specified original function
/// whose parameter indices are a minimal superset of the specified parameter
/// indices. Returns nullptr if no such attribute exists.
SILDifferentiableAttr *lookUpMinimalDifferentiableAttr(
SILFunction *original, const SILAutoDiffIndices &indices) const {
auto *minimalIndexSet = IndexSubset::getDefault(
original->getLoweredFunctionType()->getNumParameters(), false);
auto *indexSet = indices.parameters;
if (auto *exactAttr = lookUpDifferentiableAttr(original, indices))
return exactAttr;
SILDifferentiableAttr *minimalAttr = nullptr;
for (auto *da : original->getDifferentiableAttrs()) {
if (da->getIndices().source != indices.source)
auto *daIndexSet = da->getIndices().parameters;
// If all indices in `indexSet` are in `daIndexSet`, and it has fewer
// indices than our current candidate and a primitive VJP, then `da` is
// our new candidate.
// NOTE(TF-642): `da` may come from a un-partial-applied function and
// have larger capacity than the desired indices. We expect this logic to
// go away when `partial_apply` supports `@differentiable` callees.
if (daIndexSet->isSupersetOf(indexSet->extendingCapacity(
getASTContext(), daIndexSet->getCapacity())) &&
// fewer parameters than before
(minimalIndexSet->isEmpty() ||
daIndexSet->getNumIndices() < minimalIndexSet->getNumIndices())) {
minimalAttr = da;
minimalIndexSet = daIndexSet;
return minimalAttr;
/// Finds the `@differentiable` attribute (and its parameter indices) on the
/// specified original function whose parameter indices are a minimal
/// superset of the specified parameter indices. Returns nullptr if no such
/// attribute exists.
std::pair<const DifferentiableAttr *, IndexSubset *>
SILDeclRef originalDeclRef, CanSILFunctionType originalFnType,
const SILAutoDiffIndices &indices) {
auto *original = originalDeclRef.getDecl();
const DifferentiableAttr *minimalAttr = nullptr;
auto *minimalIndexSet = IndexSubset::getDefault(
getASTContext(), originalFnType->getNumParameters(), false);
auto *indexSet = indices.parameters;
for (auto *da : original->getAttrs().getAttributes<DifferentiableAttr>()) {
auto *daParamIndices = da->getParameterIndices();
auto *daIndexSet = autodiff::getLoweredParameterIndices(
daParamIndices, original->getInterfaceType()->castTo<AnyFunctionType>());
// If all indices in `indexSet` are in `daIndexSet`, and it has fewer
// indices than our current candidate and a primitive VJP, then `da` is
// our new candidate.
// NOTE(TF-642): `da` may come from a un-partial-applied function and
// have larger capacity than the desired indices. We expect this logic to
// go away when `partial_apply` supports `@differentiable` callees.
if (daIndexSet->isSupersetOf(indexSet->extendingCapacity(getASTContext(),
daIndexSet->getCapacity())) &&
// fewer parameters than before
(minimalIndexSet->isEmpty() ||
daIndexSet->getNumIndices() < minimalIndexSet->getNumIndices())) {
minimalAttr = da;
minimalIndexSet = daIndexSet;
return std::make_pair(minimalAttr, minimalIndexSet);
/// Creates a `[differentiable]` attribute on the specified original function
/// with the specified parameter indices.
SILDifferentiableAttr *createDifferentiableAttr(
SILFunction *original, const SILAutoDiffIndices &indices,
GenericSignature derivativeGenericSignature) const {
assert(!lookUpDifferentiableAttr(original, indices));
auto derivativeConstrainedGenSig = getConstrainedDerivativeGenericSignature(
original->getLoweredFunctionType(), indices.parameters,
auto *attr = SILDifferentiableAttr::create(getModule(), indices,
/*jvpName*/ StringRef(),
/*vjpName*/ StringRef(),
return attr;
/// Finds or creates a `[differentiable]` attribute on the specified
/// original function corresponding to the specified parameter indices.
SILDifferentiableAttr *getOrCreateDifferentiableAttr(
SILFunction *original, const SILAutoDiffIndices &indices,
GenericSignature derivativeGenericSignature) {
if (auto *attr = lookUpDifferentiableAttr(original, indices))
return attr;
return createDifferentiableAttr(original, indices,
/// Creates an `differentiable_function` instruction using the given builder
/// and arguments. Erase the newly created instruction from the processed set,
/// if it exists - it may exist in the processed set if it has the same
/// pointer value as a previously processed and deleted instruction.
DifferentiableFunctionInst *createDifferentiableFunction(
SILBuilder &builder, SILLocation loc,
IndexSubset *parameterIndices, SILValue original,
Optional<std::pair<SILValue, SILValue>> derivativeFunctions = None) {
auto *dfi = builder.createDifferentiableFunction(
loc, parameterIndices, original, derivativeFunctions);
return dfi;
/// Promotes the given `differentiable_function` instruction to a valid
/// `@differentiable` function-typed value.
SILValue promoteToDifferentiableFunction(
DifferentiableFunctionInst *inst, SILBuilder &builder, SILLocation loc,
DifferentiationInvoker invoker);
/// Process the given `[differentiable]` attribute, filling in JVP/VJPs if
/// missing.
bool processDifferentiableAttribute(
SILFunction *original, SILDifferentiableAttr *attr,
DifferentiationInvoker invoker);
/// Process the given `differentiable_function` instruction, filling in
/// missing derivative functions if necessary.
bool processDifferentiableFunctionInst(DifferentiableFunctionInst *dfi);
/// Fold `differentiable_function_extract` users of the given
/// `differentiable_function` instruction, directly replacing them with
/// `differentiable_function` instruction operands. If the
/// `differentiable_function` instruction has no remaining uses, delete the
/// instruction itself after folding.
/// Folding can be disabled by the
/// `SkipFoldingDifferentiableFunctionExtraction` flag for SIL testing
/// purposes.
void foldDifferentiableFunctionExtraction(DifferentiableFunctionInst *source);
/// Get or create a derivative function parameter index subset thunk from
/// `actualIndices` to `desiredIndices` for the given associated function
/// value and original function operand. Returns a pair of the parameter
/// index subset thunk and its interface substitution map (used to partially
/// apply the thunk).
/// Calls `getOrCreateSubsetParametersThunkForLinearMap` to thunk the linear
/// map returned by the derivative function.
std::pair<SILFunction *, SubstitutionMap>
SILValue origFnOperand, SILValue derivativeFn,
AutoDiffDerivativeFunctionKind kind, SILAutoDiffIndices desiredIndices,
SILAutoDiffIndices actualIndices);
/// Get or create a derivative function parameter index subset thunk from
/// `actualIndices` to `desiredIndices` for the given associated function
/// value and original function operand. Returns a pair of the parameter
/// index subset thunk and its interface substitution map (used to partially
/// apply the thunk).
std::pair<SILFunction *, SubstitutionMap>
SILFunction *assocFn, CanSILFunctionType linearMapType,
CanSILFunctionType targetType, AutoDiffDerivativeFunctionKind kind,
SILAutoDiffIndices desiredIndices, SILAutoDiffIndices actualIndices);
/// Declare an external reference to a derivative function of `original`,
/// given a `[differentiable]` attribute of `original` and the associated
/// function kind.
SILFunction *
declareExternalDerivativeFunction(SILFunction *original,
SILDifferentiableAttr *attr, StringRef name,
AutoDiffDerivativeFunctionKind kind);
template <typename ...T, typename ...U>
InFlightDiagnostic diagnose(SourceLoc loc, Diag<T...> diag,
U &&...args) const {
return getASTContext().Diags.diagnose(loc, diag, std::forward<U>(args)...);
/// Given an instruction and a differentiation task associated with the
/// parent function, emits a "not differentiable" error based on the task. If
/// the task is indirect, emits notes all the way up to the outermost task,
/// and emits an error at the outer task. Otherwise, emits an error directly.
template<typename ...T, typename ...U>
InFlightDiagnostic emitNondifferentiabilityError(
SILInstruction *inst, DifferentiationInvoker invoker,
Diag<T...> diag, U &&...args);
/// Given a value and a differentiation task associated with the parent
/// function, emits a "not differentiable" error based on the task. If the
/// task is indirect, emits notes all the way up to the outermost task, and
/// emits an error at the outer task. Otherwise, emits an error directly.
template<typename ...T, typename ...U>
InFlightDiagnostic emitNondifferentiabilityError(
SILValue value, DifferentiationInvoker invoker,
Diag<T...> diag, U &&...args);
/// Emit a "not differentiable" error based on the given differentiation task
/// and diagnostic.
template<typename ...T, typename ...U>
InFlightDiagnostic emitNondifferentiabilityError(
SourceLoc loc, DifferentiationInvoker invoker,
Diag<T...> diag, U &&...args);
} // end anonymous namespace
ADContext::ADContext(SILModuleTransform &transform)
: transform(transform), module(*transform.getModule()),
passManager(*transform.getPassManager()) {}
template<typename ...T, typename ...U>
ADContext::emitNondifferentiabilityError(SILValue value,
DifferentiationInvoker invoker,
Diag<T...> diag, U &&...args) {
getADDebugStream() << "Diagnosing non-differentiability.\n";
getADDebugStream() << "For value:\n" << value;
getADDebugStream() << "With invoker:\n" << invoker << '\n';
auto valueLoc = value.getLoc().getSourceLoc();
// If instruction does not have a valid location, use the function location
// as a fallback. Improves diagnostics in some cases.
if (valueLoc.isInvalid())
valueLoc = value->getFunction()->getLocation().getSourceLoc();
return emitNondifferentiabilityError(valueLoc, invoker, diag,
template<typename ...T, typename ...U>
ADContext::emitNondifferentiabilityError(SILInstruction *inst,
DifferentiationInvoker invoker,
Diag<T...> diag, U &&...args) {
getADDebugStream() << "Diagnosing non-differentiability.\n";
getADDebugStream() << "For instruction:\n" << *inst;
getADDebugStream() << "With invoker:\n" << invoker << '\n';
auto instLoc = inst->getLoc().getSourceLoc();
// If instruction does not have a valid location, use the function location
// as a fallback. Improves diagnostics for `ref_element_addr` generated in
// synthesized stored property getters.
if (instLoc.isInvalid())
instLoc = inst->getFunction()->getLocation().getSourceLoc();
return emitNondifferentiabilityError(instLoc, invoker, diag,
template<typename ...T, typename ...U>
ADContext::emitNondifferentiabilityError(SourceLoc loc,
DifferentiationInvoker invoker,
Diag<T...> diag, U &&...args) {
switch (invoker.getKind()) {
// For `differentiable_function` instructions: if the `differentiable_function`
// instruction comes from a differential operator, emit an error on the
// expression and a note on the non-differentiable operation. Otherwise, emit
// both an error and note on the non-differentiation operation.
case DifferentiationInvoker::Kind::DifferentiableFunctionInst: {
auto *inst = invoker.getDifferentiableFunctionInst();
if (auto *expr = findDifferentialOperator(inst)) {
diagnose(expr->getLoc(), diag::autodiff_function_not_differentiable_error)
return diagnose(loc, diag, std::forward<U>(args)...);
diagnose(loc, diag::autodiff_expression_not_differentiable_error);
return diagnose(loc, diag, std::forward<U>(args)...);
// For `[differentiable]` attributes, try to find an AST function declaration
// and `@differentiable` attribute. If they are found, emit an error on the
// `@differentiable` attribute; otherwise, emit an error on the SIL function.
// Emit a note at the non-differentiable operation.
case DifferentiationInvoker::Kind::SILDifferentiableAttribute: {
auto *attr = invoker.getSILDifferentiableAttribute();
auto *original = attr->getOriginal();
bool foundAttr = false;
if (auto *declContext = original->getDeclContext()) {
if (auto *fnDecl = declContext->getInnermostDeclarationDeclContext()) {
if (auto *diffAttr =
fnDecl->getAttrs().getAttribute<DifferentiableAttr>()) {
foundAttr = true;
// Fallback if we cannot find the expected attribute.
if (!foundAttr)
return diagnose(loc, diag, std::forward<U>(args)...);
// For indirect differentiation, emit a "not differentiable" note on the
// expression first. Then emit an error at the source invoker of
// differentiation, and a "when differentiating this" note at each indirect
// invoker.
case DifferentiationInvoker::Kind::IndirectDifferentiation: {
SILInstruction *inst;
SILDifferentiableAttr *attr;
std::tie(inst, attr) = invoker.getIndirectDifferentiation();
auto invokerLookup = invokers.find(attr);
assert(invokerLookup != invokers.end() && "Expected parent invoker");
emitNondifferentiabilityError(inst, invokerLookup->second,
return diagnose(loc, diag::autodiff_when_differentiating_function_call);
// Activity Analysis
namespace {
class DifferentiableActivityCollection;
/// In many real situations, the end-users of AD need only the derivatives of
/// some selected outputs of `P` with respect to some selected inputs of `P`.
/// Whatever the differentiation mode (tangent, reverse,...), these restrictions
/// allow the AD tool to produce a much more efficient differentiated program.
/// Essentially, fixing some inputs and neglecting some outputs allows AD to
/// just forget about several intermediate differentiated variables.
/// Activity analysis is the specific analysis that detects these situations,
/// therefore allowing for a better differentiated code. Activity analysis is
/// present in all transformation-based AD tools.
/// To begin with, the end-user specifies that only some output variables (the
/// “dependent”) must be differentiated with respect to only some input
/// variables (the “independent”). We say that variable `y` depends on `x` when
/// the derivative of `y` with respect to `x` is not trivially null. We say that
/// a variable is “varied” if it depends on at least one independent. Conversely
/// we say that a variable is “useful” if at least one dependent depends on it.
/// Finally, we say that a variable is “active” if it is at the same time varied
/// and useful. In the special case of the tangent mode, it is easy to check
/// that when variable `v` is not varied at some place in the program, then its
/// derivative `v̇` at this place is certainly null. Conversely when variable `v`
/// is not useful, then whatever the value of `v̇`, this value does not matter
/// for the final result. Symmetric reasoning applies for the reverse mode of
/// AD: observing that differentiated variables go upstream, we see that a
/// useless variable has a null derivative, in other words the partial
/// derivative of the output with respect to this variable is null. Conversely
/// when variable `v` is not varied, then whatever the value of `v`, this value
/// does not matter for the final result.
/// Reference:
/// Laurent Hascoët. Automatic Differentiation by Program Transformation. 2007.
class DifferentiableActivityAnalysis
: public FunctionAnalysisBase<DifferentiableActivityCollection> {
DominanceAnalysis *dominanceAnalysis = nullptr;
PostDominanceAnalysis *postDominanceAnalysis = nullptr;
explicit DifferentiableActivityAnalysis()
: FunctionAnalysisBase(SILAnalysisKind::DifferentiableActivity) {}
static bool classof(const SILAnalysis *s) {
return s->getKind() == SILAnalysisKind::DifferentiableActivity;
virtual bool shouldInvalidate(SILAnalysis::InvalidationKind k) override {
return k & InvalidationKind::Everything;
virtual std::unique_ptr<DifferentiableActivityCollection>
newFunctionAnalysis(SILFunction *f) override;
virtual void initialize(SILPassManager *pm) override;
} // end anonymous namespace
namespace {
/// Represents the differentiation activity associated with a SIL value.
enum class ActivityFlags : unsigned {
/// The value depends on a function parameter.
Varied = 1 << 1,
/// The value contributes to a result.
Useful = 1 << 2,
/// The value is both varied and useful.
Active = Varied | Useful,
using Activity = OptionSet<ActivityFlags>;
/// Result of activity analysis on a function. Accepts queries for whether a
/// value is "varied", "useful" or "active" against certain differentiation
/// indices.
class DifferentiableActivityInfo {
DifferentiableActivityCollection &parent;
/// The derivative generic signature.
GenericSignature derivativeGenericSignature;
/// Input values, i.e. parameters (both direct and indirect).
SmallVector<SILValue, 4> inputValues;
/// Output values, i.e. individual values (not the final tuple) being returned
/// by the `return` instruction.
SmallVector<SILValue, 4> outputValues;
/// The set of useful variables, indexed by the corresponding dependent value
/// (output) index.
SmallVector<SmallDenseSet<SILValue>, 4> usefulValueSets;
/// The set of useful variables, indexed by the corresponding independent
/// value (input) index.
SmallVector<SmallDenseSet<SILValue>, 4> variedValueSets;
/// The original function.
SILFunction &getFunction();
/// The conformance lookup function.
LookupConformanceFn getLookupConformanceFunction() {
// Look up in derivative generic signature, if defined.
if (derivativeGenericSignature)
return LookUpConformanceInSignature(
// Otherwise, look up in the module.
return LookUpConformanceInModule(
/// Perform analysis and populate sets.
void analyze(DominanceInfo *di, PostDominanceInfo *pdi);
void setVaried(SILValue value, unsigned independentVariableIndex);
void setVariedAcrossArrayInitialization(SILValue value,
unsigned independentVariableIndex);
void setUseful(SILValue value, unsigned dependentVariableIndex);
void setUsefulAcrossArrayInitialization(SILValue value,
unsigned dependentVariableIndex);
/// Marks the given value as "varied" and recursively propagates "varied"
/// inwards (to operands) through projections. Skips any `@noDerivative`
/// struct field projections.
void propagateVariedInwardsThroughProjections(
SILValue value, unsigned independentVariableIndex);
void propagateUsefulThroughBuffer(SILValue value,
unsigned dependentVariableIndex);
explicit DifferentiableActivityInfo(
DifferentiableActivityCollection &parent,
GenericSignature derivativeGenericSignature);
bool isVaried(SILValue value, unsigned independentVariableIndex) const;
bool isUseful(SILValue value, unsigned dependentVariableIndex) const;
bool isVaried(SILValue value, IndexSubset *parameterIndices) const;
bool isActive(SILValue value, const SILAutoDiffIndices &indices) const;
Activity getActivity(SILValue value,
const SILAutoDiffIndices &indices) const;
Activity getActivity(SILInstruction *inst,
const SILAutoDiffIndices &indices) const;
/// Given a parameter argument (not indirect result) and some differentiation
/// indices, figure out whether the parent function is being differentiated with
/// respect to this parameter, according to the indices.
static bool isDifferentiationParameter(SILArgument *argument,
IndexSubset *indices) {
if (!argument) return false;
auto *function = argument->getFunction();
auto paramArgs = function->getArgumentsWithoutIndirectResults();
for (unsigned i : indices->getIndices())
if (paramArgs[i] == argument)
return true;
return false;
/// For an `apply` instruction with active results, compute:
/// - The results of the `apply` instruction, in type order.
/// - The set of minimal parameter and result indices for differentiating the
/// `apply` instruction.
static void collectMinimalIndicesForFunctionCall(
ApplyInst *ai, const SILAutoDiffIndices &parentIndices,
const DifferentiableActivityInfo &activityInfo,
SmallVectorImpl<SILValue> &results, SmallVectorImpl<unsigned> &paramIndices,
SmallVectorImpl<unsigned> &resultIndices) {
auto calleeFnTy = ai->getSubstCalleeType();
auto calleeConvs = ai->getSubstCalleeConv();
// Parameter indices are indices (in the callee type signature) of parameter
// arguments that are varied or are arguments.
// Record all parameter indices in type order.
unsigned currentParamIdx = 0;
for (auto applyArg : ai->getArgumentsWithoutIndirectResults()) {
if (activityInfo.isVaried(applyArg, parentIndices.parameters) ||
// Result indices are indices (in the callee type signature) of results that
// are useful.
SmallVector<SILValue, 8> directResults;
forEachApplyDirectResult(ai, [&](SILValue directResult) {
auto indirectResults = ai->getIndirectSILResults();
// Record all results and result indices in type order.
unsigned dirResIdx = 0;
unsigned indResIdx = calleeConvs.getSILArgIndexOfFirstIndirectResult();
for (auto &resAndIdx : enumerate(calleeConvs.getResults())) {
auto &res = resAndIdx.value();
unsigned idx = resAndIdx.index();
if (res.isFormalDirect()) {
if (auto dirRes = directResults[dirResIdx])
if (dirRes && activityInfo.isUseful(dirRes, parentIndices.source))
} else {
if (activityInfo.isUseful(indirectResults[indResIdx],
// Make sure the function call has active results.
assert(results.size() == calleeFnTy->getNumResults());
assert(llvm::any_of(results, [&](SILValue result) {
return activityInfo.isActive(result, parentIndices);
LinearMapInfo::LinearMapInfo(ADContext &context,
AutoDiffLinearMapKind kind,
SILFunction *original, SILFunction *derivative,
const SILAutoDiffIndices &indices,
const DifferentiableActivityInfo &activityInfo)
: kind(kind), original(original), derivative(derivative),
activityInfo(activityInfo), indices(indices),
typeConverter(context.getTypeConverter()) {
generateDifferentiationDataStructures(context, indices, derivative);
/// Returns a flag that indicates whether the `apply` instruction should be
/// differentiated, given the differentiation indices of the instruction's
/// parent function. Whether the `apply` should be differentiated is determined
/// sequentially from the following conditions:
/// 1. The instruction has an active `inout` argument.
/// 2. The instruction is a call to the array literal initialization intrinsic
/// ("array.uninitialized_intrinsic"), where the result is active and where
/// there is a `store` of an active value into the array's buffer.
/// 3. The instruction has both an active result (direct or indirect) and an
/// active argument.
bool LinearMapInfo::shouldDifferentiateApplyInst(ApplyInst *ai) {
// Function applications with an inout argument should be differentiated.
auto paramInfos = ai->getSubstCalleeConv().getParameters();
auto arguments = ai->getArgumentsWithoutIndirectResults();
for (auto i : swift::indices(paramInfos))
if (paramInfos[i].isIndirectInOut() &&
activityInfo.isActive(arguments[i], indices))
return true;
bool hasActiveDirectResults = false;
forEachApplyDirectResult(ai, [&](SILValue directResult) {
hasActiveDirectResults |= activityInfo.isActive(directResult, indices);
bool hasActiveIndirectResults = llvm::any_of(ai->getIndirectSILResults(),
[&](SILValue result) { return activityInfo.isActive(result, indices); });
bool hasActiveResults = hasActiveDirectResults || hasActiveIndirectResults;
// TODO: Pattern match to make sure there is at least one `store` to the
// array's active buffer.
if (isArrayLiteralIntrinsic(ai) && hasActiveResults)
return true;
bool hasActiveArguments = llvm::any_of(arguments,
[&](SILValue arg) { return activityInfo.isActive(arg, indices); });
return hasActiveResults && hasActiveArguments;
/// Returns a flag indicating whether the instruction should be differentiated,
/// given the differentiation indices of the instruction's parent function.
/// Whether the instruction should be differentiated is determined sequentially
/// from any of the following conditions:
/// 1. The instruction is an `apply` and `shouldDifferentiateApplyInst` returns
/// true.
/// 2. The instruction has a source operand and a destination operand, both
/// being active.
/// 3. The instruction is an allocation instruction and has an active result.
/// 4. The instruction performs reference counting, lifetime ending, access
/// ending, or destroying on an active operand.
/// 5. The instruction creates an SSA copy of an active operand.
bool LinearMapInfo::shouldDifferentiateInstruction(SILInstruction *inst) {
// An `apply` with an active argument and an active result (direct or
// indirect) should be differentiated.
if (auto *ai = dyn_cast<ApplyInst>(inst))
return shouldDifferentiateApplyInst(ai);
// Anything with an active result and an active operand should be
// differentiated.
auto hasActiveOperands = llvm::any_of(inst->getAllOperands(),
[&](Operand &op) { return activityInfo.isActive(op.get(), indices); });
auto hasActiveResults = llvm::any_of(inst->getResults(),
[&](SILValue val) { return activityInfo.isActive(val, indices); });
if (hasActiveOperands && hasActiveResults)
return true;
// A `store`-like instruction does not have an SSA result, but has two
// operands that represent the source and the destination. We treat them as
// the input and the output, respectively.
if (auto *castInst = dyn_cast<INST##Inst>(inst)) \
return activityInfo.isActive(castInst->getDest(), indices);
// Should differentiate any allocation instruction that has an active result.
if ((isa<AllocationInst>(inst) && hasActiveResults))
return true;
if (hasActiveOperands) {
// Should differentiate any instruction that performs reference counting,
// lifetime ending, access ending, or destroying on an active operand.
if (isa<RefCountingInst>(inst) || isa<EndAccessInst>(inst) ||
isa<EndBorrowInst>(inst) || isa<DeallocationInst>(inst) ||
isa<DestroyValueInst>(inst) || isa<DestroyAddrInst>(inst))
return true;
// Should differentiate any instruction that creates an SSA copy of an
// active operand.
if (isa<CopyValueInst>(inst))
return true;
return false;
/// Takes an `apply` instruction and adds its linear map function to the
/// linear map struct if it is active.
void LinearMapInfo::addLinearMapToStruct(ADContext &context, ApplyInst *ai,
const SILAutoDiffIndices &indices) {
SmallVector<SILValue, 4> allResults;
SmallVector<unsigned, 8> activeParamIndices;
SmallVector<unsigned, 8> activeResultIndices;
ai, indices, activityInfo, allResults, activeParamIndices,
// Check if there are any active results or arguments. If not, skip
// this instruction.
auto hasActiveResults = llvm::any_of(allResults, [&](SILValue res) {
return activityInfo.isActive(res, indices);
auto hasActiveArguments = llvm::any_of(
ai->getArgumentsWithoutIndirectResults(), [&](SILValue arg) {
return activityInfo.isActive(arg, indices);
if (!hasActiveResults || !hasActiveArguments)
// Compute differentiation result index.
auto source = activeResultIndices.front();
// Compute differentiation parameters.
// - If the callee has `@differentiable` function type, use differentiation
// parameters from the function type.
// - Otherwise, use the active parameters.
IndexSubset *parameters;
auto origFnSubstTy = ai->getSubstCalleeType();
auto remappedOrigFnSubstTy =
if (remappedOrigFnSubstTy->isDifferentiable()) {
parameters = remappedOrigFnSubstTy->getDifferentiationParameterIndices();
} else {
parameters = IndexSubset::get(
// Create autodiff indices for the `apply` instruction.
SILAutoDiffIndices applyIndices(source, parameters);
// Check for non-differentiable original function type.
auto checkNondifferentiableOriginalFunctionType =
[&](CanSILFunctionType origFnTy) {
// Check non-differentiable arguments.
for (unsigned paramIndex : range(origFnTy->getNumParameters())) {
auto remappedParamType =
if (applyIndices.isWrtParameter(paramIndex) &&
return true;
// Check non-differentiable results.
auto remappedResultType =
if (!remappedResultType.isDifferentiable(derivative->getModule()))
return true;
return false;
if (checkNondifferentiableOriginalFunctionType(remappedOrigFnSubstTy))
AutoDiffDerivativeFunctionKind derivativeFnKind(kind);
auto derivativeFnType = remappedOrigFnSubstTy->getAutoDiffDerivativeFunctionType(
parameters, source, derivativeFnKind, context.getTypeConverter(),
auto derivativeFnResultTypes =
derivativeFnResultTypes->getElement(derivativeFnResultTypes->getElements().size() - 1);
auto linearMapSILType = SILType::getPrimitiveObjectType(
->getElement(derivativeFnResultTypes->getElements().size() - 1)
addLinearMapDecl(ai, linearMapSILType);
void LinearMapInfo::generateDifferentiationDataStructures(
ADContext &context, const SILAutoDiffIndices &indices,
SILFunction *derivativeFn) {
auto &astCtx = original->getASTContext();
auto *loopAnalysis = context.getPassManager().getAnalysis<SILLoopAnalysis>();
auto *loopInfo = loopAnalysis->get(original);
// Get the derivative function generic signature.
CanGenericSignature derivativeFnGenSig = nullptr;
if (auto *derivativeFnGenEnv = derivativeFn->getGenericEnvironment())
derivativeFnGenSig =
// Create linear map struct for each original block.
for (auto &origBB : *original) {
auto *linearMapStruct =
createLinearMapStruct(&origBB, indices, derivativeFnGenSig);
linearMapStructs.insert({&origBB, linearMapStruct});
// Create branching trace enum for each original block and add it as a field
// in the corresponding struct.
StringRef traceEnumFieldName;
switch (kind) {
case AutoDiffLinearMapKind::Differential:
traceEnumFieldName = "successor";
case AutoDiffLinearMapKind::Pullback:
traceEnumFieldName = "predecessor";
for (auto &origBB : *original) {
auto *traceEnum =
createBranchingTraceDecl(&origBB, indices, derivativeFnGenSig, loopInfo);
branchingTraceDecls.insert({&origBB, traceEnum});
if (origBB.isEntry())
// Add branching trace enum field to corresponding linear map struct.
auto *linearMapStruct = getLinearMapStruct(&origBB);
auto *traceEnumField =
linearMapStructEnumFields.insert({linearMapStruct, traceEnumField});
// Add linear map fields to the linear map structs.
for (auto &origBB : *original) {
for (auto &inst : origBB) {
if (auto *ai = dyn_cast<ApplyInst>(&inst)) {
// Check for active 'inout' arguments.
bool isInout = false;
auto paramInfos = ai->getSubstCalleeConv().getParameters();
for (unsigned i : swift::indices(paramInfos)) {
if (paramInfos[i].isIndirectInOut() &&
indices)) {
// Reject functions with active inout arguments. It's not yet
// supported.
isInout = true;
if (isInout)
// Add linear map field to struct for active `apply` instructions.
// Skip array literal intrinsic applications since array literal
// initialization is linear and handled separately.
if (!shouldDifferentiateApplyInst(ai) || isArrayLiteralIntrinsic(ai))
LLVM_DEBUG(getADDebugStream() << "Adding linear map struct field for "
<< *ai);
addLinearMapToStruct(context, ai, indices);
// Print generated linear map structs and branching trace enums.
// These declarations do not show up with `-emit-sil` because they are
// implicit. Instead, use `-Xllvm -debug-only=differentiation` to test
// declarations with FileCheck.
auto &s = getADDebugStream();
PrintOptions printOptions;
printOptions.TypeDefinitions = true;
printOptions.ExplodePatternBindingDecls = true;
printOptions.SkipImplicit = false;
s << "Generated linear map structs and branching trace enums for @"
<< original->getName() << ":\n";
for (auto &origBB : *original) {
auto *linearMapStruct = getLinearMapStruct(&origBB);
linearMapStruct->print(s, printOptions); s << '\n';
for (auto &origBB : *original) {
auto *traceEnum = getBranchingTraceDecl(&origBB);
traceEnum->print(s, printOptions); s << '\n';
class DifferentiableActivityCollection {
SmallDenseMap<GenericSignatureImpl *, DifferentiableActivityInfo> activityInfoMap;
SILFunction &function;
DominanceInfo *domInfo;
PostDominanceInfo *postDomInfo;
DifferentiableActivityInfo &getActivityInfo(
GenericSignature assocGenSig, AutoDiffDerivativeFunctionKind kind) {
auto activityInfoLookup = activityInfoMap.find(assocGenSig.getPointer());
if (activityInfoLookup != activityInfoMap.end())
return activityInfoLookup->getSecond();
auto insertion = activityInfoMap.insert(
{assocGenSig.getPointer(), DifferentiableActivityInfo(*this, assocGenSig)});
return insertion.first->getSecond();
explicit DifferentiableActivityCollection(SILFunction &f,
DominanceInfo *di,
PostDominanceInfo *pdi);
} // end anonymous namespace
DifferentiableActivityAnalysis::newFunctionAnalysis(SILFunction *f) {
assert(dominanceAnalysis && "Expect a valid dominance anaysis");
assert(postDominanceAnalysis && "Expect a valid post-dominance anaysis");
return llvm::make_unique<DifferentiableActivityCollection>(
*f, dominanceAnalysis->get(f), postDominanceAnalysis->get(f));
void DifferentiableActivityAnalysis::initialize(SILPassManager *pm) {
dominanceAnalysis = pm->getAnalysis<DominanceAnalysis>();
postDominanceAnalysis = pm->getAnalysis<PostDominanceAnalysis>();
SILAnalysis *swift::createDifferentiableActivityAnalysis(SILModule *m) {
return new DifferentiableActivityAnalysis();
SILFunction &f, DominanceInfo *di, PostDominanceInfo *pdi)
: function(f), domInfo(di), postDomInfo(pdi) {}
DifferentiableActivityCollection &parent, GenericSignature derivGenSig)
: parent(parent), derivativeGenericSignature(derivGenSig) {
analyze(parent.domInfo, parent.postDomInfo);
SILFunction &DifferentiableActivityInfo::getFunction() {
return parent.function;
void DifferentiableActivityInfo::analyze(DominanceInfo *di,
PostDominanceInfo *pdi) {
auto &function = getFunction();
<< "Running activity analysis on @" << function.getName() << '\n');
// Inputs are just function's arguments, count `n`.
auto paramArgs = function.getArgumentsWithoutIndirectResults();
for (auto value : paramArgs)
auto &s = getADDebugStream();
s << "Inputs in @" << function.getName() << ":\n";
for (auto val : inputValues)
s << val << '\n';
// Outputs are indirect result buffers and return values, count `m`.
collectAllFormalResultsInTypeOrder(function, outputValues);
auto &s = getADDebugStream();
s << "Outputs in @" << function.getName() << ":\n";
for (auto val : outputValues)
s << val << '\n';
// Mark inputs as varied.
for (auto input : inputValues)
// Propagate varied-ness through the function in dominance order.
DominanceOrder domOrder(function.getEntryBlock(), di);
while (auto *bb = domOrder.getNext()) {
for (auto &inst : *bb) {
for (auto i : indices(inputValues)) {
// Handle `apply`.
if (auto *ai = dyn_cast<ApplyInst>(&inst)) {
// If callee is non-varying, skip.
if (isWithoutDerivative(ai->getCallee()))
// If any argument is varied, set all direct and indirect results as
// varied.
for (auto arg : ai->getArgumentsWithoutIndirectResults()) {
if (isVaried(arg, i)) {
for (auto indRes : ai->getIndirectSILResults())
setVaried(indRes, i);
forEachApplyDirectResult(ai, [&](SILValue directResult) {
setVaried(directResult, i);
// Handle store-like instructions:
// `store`, `store_borrow`, `copy_addr`, `unconditional_checked_cast`
else if (auto *si = dyn_cast<INST##Inst>(&inst)) { \
if (isVaried(si->getSrc(), i)) \
propagateVariedInwardsThroughProjections(si->getDest(), i); \
// Handle `tuple_element_addr`.
else if (auto *teai = dyn_cast<TupleElementAddrInst>(&inst)) {
if (isVaried(teai->getOperand(), i)) {
auto projType = teai->getType().getASTType();
if (derivativeGenericSignature && projType->hasArchetype())
projType = derivativeGenericSignature->getCanonicalTypeInContext(
if (projType->getAutoDiffAssociatedTangentSpace(
setVaried(teai, i);
// Handle `struct_extract` and `struct_element_addr` instructions.
// - If the field is marked `@noDerivative`, do not set the result as
// varied because it is not in the set of differentiable variables.
// - Otherwise, propagate variedness from operand to result as usual.
else if (auto *sei = dyn_cast<INST##Inst>(&inst)) { \
if (isVaried(sei->getOperand(), i) && \
!sei->getField()->getAttrs().hasAttribute<NoDerivativeAttr>()) \
setVaried(sei, i); \
// Handle `br`.
else if (auto *bi = dyn_cast<BranchInst>(&inst)) {
for (auto &op : bi->getAllOperands())
if (isVaried(op.get(), i))
setVaried(bi->getArgForOperand(&op), i);
// Handle `cond_br`.
else if (auto *cbi = dyn_cast<CondBranchInst>(&inst)) {
for (unsigned opIdx : indices(cbi->getTrueOperands())) {
auto &op = cbi->getTrueOperands()[opIdx];
if (isVaried(op.get(), i))
setVaried(cbi->getTrueBB()->getArgument(opIdx), i);
for (unsigned opIdx : indices(cbi->getFalseOperands())) {
auto &op = cbi->getFalseOperands()[opIdx];
if (isVaried(op.get(), i))
setVaried(cbi->getFalseBB()->getArgument(opIdx), i);
// Handle `switch_enum`.
else if (auto *sei = dyn_cast<SwitchEnumInst>(&inst)) {
if (isVaried(sei->getOperand(), i))
for (auto *succBB : sei->getSuccessorBlocks())
for (auto *arg : succBB->getArguments())
setVaried(arg, i);
// Handle everything else.
else {
for (auto &op : inst.getAllOperands())
if (isVaried(op.get(), i))
for (auto result : inst.getResults())
setVaried(result, i);
// Mark differentiable outputs as useful.
for (auto output : outputValues) {
// If the output has an address or class type, propagate usefulness
// recursively.
if (output->getType().isAddress() ||
propagateUsefulThroughBuffer(output, usefulValueSets.size() - 1);
// Otherwise, just mark the output as useful.
setUseful(output, usefulValueSets.size() - 1);
// Propagate usefulness through the function in post-dominance order.
PostDominanceOrder postDomOrder(&*function.findReturnBB(), pdi);
while (auto *bb = postDomOrder.getNext()) {
for (auto &inst : llvm::reverse(*bb)) {
for (auto i : indices(outputValues)) {
// Handle indirect results in `apply`.
if (auto *ai = dyn_cast<ApplyInst>(&inst)) {
if (isWithoutDerivative(ai->getCallee()))
auto checkAndSetUseful = [&](SILValue res) {
if (isUseful(res, i))
for (auto arg : ai->getArgumentsWithoutIndirectResults())
setUseful(arg, i);
for (auto dirRes : ai->getResults())
for (auto indRes : ai->getIndirectSILResults())
auto paramInfos = ai->getSubstCalleeConv().getParameters();
for (auto i : indices(paramInfos))
if (paramInfos[i].isIndirectInOut())
// Handle store-like instructions:
// `store`, `store_borrow`, `copy_addr`, `unconditional_checked_cast`
else if (auto *si = dyn_cast<INST##Inst>(&inst)) { \
if (isUseful(si->getDest(), i)) \
PROPAGATE(si->getSrc(), i); \
PROPAGATE_USEFUL_THROUGH_STORE(CopyAddr, propagateUsefulThroughBuffer)
// Handle struct element extraction, skipping `@noDerivative` fields:
// `struct_extract`, `struct_element_addr`.
else if (auto *sei = dyn_cast<INST##Inst>(&inst)) { \
if (isUseful(sei, i)) { \
auto hasNoDeriv = sei->getField()->getAttrs() \
.hasAttribute<NoDerivativeAttr>(); \
if (!hasNoDeriv) \
PROPAGATE(sei->getOperand(), i); \
} \
// Handle everything else.
else if (llvm::any_of(inst.getResults(),
[&](SILValue res) { return isUseful(res, i); })) {
for (auto &op : inst.getAllOperands()) {
auto value = op.get();
if (value->getType().isAddress())
propagateUsefulThroughBuffer(value, i);
setUseful(value, i);
// Propagate usefulness from basic block arguments to incoming phi values.
for (auto i : indices(outputValues)) {
for (auto *arg : bb->getArguments()) {
if (isUseful(arg, i)) {
SmallVector<SILValue, 4> incomingValues;
for (auto incomingValue : incomingValues)
setUseful(incomingValue, i);
void DifferentiableActivityInfo::setVariedAcrossArrayInitialization(
SILValue value, unsigned independentVariableIndex) {
auto uai = getAllocateUninitializedArrayIntrinsic(value);
if (!uai) return;
for (auto use : value->getUses())
if (auto *dti = dyn_cast<DestructureTupleInst>(use->getUser()))
// The first tuple field of the intrinsic's return value is the array.
setVaried(dti->getResult(0), independentVariableIndex);
void DifferentiableActivityInfo::setUsefulAcrossArrayInitialization(
SILValue value, unsigned dependentVariableIndex) {
// Array initializer syntax is lowered to an intrinsic and one or more
// stores to a `RawPointer` returned by the intrinsic.
auto uai = getAllocateUninitializedArrayIntrinsic(value);
if (!uai) return;
for (auto use : value->getUses()) {
auto dti = dyn_cast<DestructureTupleInst>(use->getUser());
if (!dti) continue;
// The second tuple field of the return value is the `RawPointer`.
for (auto use : dti->getResult(1)->getUses()) {
// The `RawPointer` passes through a `pointer_to_address`. That
// instruction's first use is a `store` whose src is useful; its
// subsequent uses are `index_addr`s whose only use is a useful `store`.
for (auto use : use->getUser()->getResult(0)->getUses()) {
auto inst = use->getUser();
if (auto si = dyn_cast<StoreInst>(inst)) {
setUseful(si->getSrc(), dependentVariableIndex);
} else if (auto iai = dyn_cast<IndexAddrInst>(inst)) {
for (auto use : iai->getUses())
if (auto si = dyn_cast<StoreInst>(use->getUser()))
setUseful(si->getSrc(), dependentVariableIndex);
void DifferentiableActivityInfo::setVaried(SILValue value,
unsigned independentVariableIndex) {
setVariedAcrossArrayInitialization(value, independentVariableIndex);
void DifferentiableActivityInfo::setUseful(SILValue value,
unsigned dependentVariableIndex) {
setUsefulAcrossArrayInitialization(value, dependentVariableIndex);
void DifferentiableActivityInfo::propagateVariedInwardsThroughProjections(
SILValue value, unsigned independentVariableIndex) {
if (auto *sei = dyn_cast<INST##Inst>(value)) \
if (sei->getField()->getAttrs().hasAttribute<NoDerivativeAttr>()) \
setVaried(value, independentVariableIndex);
auto *inst = value->getDefiningInstruction();
if (!inst || isa<ApplyInst>(inst))
// Standard propagation.
for (auto &op : inst->getAllOperands())
op.get(), independentVariableIndex);
void DifferentiableActivityInfo::propagateUsefulThroughBuffer(
SILValue value, unsigned dependentVariableIndex) {
assert(value->getType().isAddress() ||
// Check whether value is already useful to prevent infinite recursion.
if (isUseful(value, dependentVariableIndex))
setUseful(value, dependentVariableIndex);
if (auto *inst = value->getDefiningInstruction())
for (auto &operand : inst->getAllOperands())
if (operand.get()->getType().isAddress())
propagateUsefulThroughBuffer(operand.get(), dependentVariableIndex);
// Recursively propagate usefulness through users that are projections or
// `begin_access` instructions.
for (auto use : value->getUses()) {
for (auto res : use->getUser()->getResults()) {
if (auto *sei = dyn_cast<INST##Inst>(res)) \
if (sei->getField()->getAttrs().hasAttribute<NoDerivativeAttr>()) \
if (Projection::isAddressProjection(res) || isa<BeginAccessInst>(res))
propagateUsefulThroughBuffer(res, dependentVariableIndex);
bool DifferentiableActivityInfo::isVaried(
SILValue value, unsigned independentVariableIndex) const {
assert(independentVariableIndex < variedValueSets.size() &&
"Independent variable index out of range");
auto &set = variedValueSets[independentVariableIndex];
return set.count(value);
bool DifferentiableActivityInfo::isVaried(
SILValue value, IndexSubset *parameterIndices) const {
for (auto paramIdx : parameterIndices->getIndices())
if (isVaried(value, paramIdx))
return true;
return false;
bool DifferentiableActivityInfo::isUseful(
SILValue value, unsigned dependentVariableIndex) const {
assert(dependentVariableIndex < usefulValueSets.size() &&
"Dependent variable index out of range");
auto &set = usefulValueSets[dependentVariableIndex];
return set.count(value);
bool DifferentiableActivityInfo::isActive(
SILValue value, const SILAutoDiffIndices &indices) const {
return isVaried(value, indices.parameters) && isUseful(value, indices.source);
Activity DifferentiableActivityInfo::getActivity(
SILValue value, const SILAutoDiffIndices &indices) const {
Activity activity;
if (isVaried(value, indices.parameters))
activity |= ActivityFlags::Varied;
if (isUseful(value, indices.source))
activity |= ActivityFlags::Useful;
return activity;
Activity DifferentiableActivityInfo::getActivity(
SILInstruction *inst, const SILAutoDiffIndices &indices) const {
Activity activity;
for (auto result : inst->getResults())
activity |= getActivity(result, indices);
return activity;
static void dumpActivityInfo(SILValue value,
const SILAutoDiffIndices &indices,
const DifferentiableActivityInfo &activityInfo,
llvm::raw_ostream &s = llvm::dbgs()) {
s << '[';
auto activity = activityInfo.getActivity(value, indices);
switch (activity.toRaw()) {
case 0: s << "NONE"; break;
case (unsigned)ActivityFlags::Varied: s << "VARIED"; break;
case (unsigned)ActivityFlags::Useful: s << "USEFUL"; break;
case (unsigned)ActivityFlags::Active: s << "ACTIVE"; break;
s << "] " << value;
static void dumpActivityInfo(SILFunction &fn,
const SILAutoDiffIndices &indices,
const DifferentiableActivityInfo &activityInfo,
llvm::raw_ostream &s = llvm::dbgs()) {
s << "Activity info for " << fn.getName() << " at " << indices << '\n';
for (auto &bb : fn) {
s << "bb" << bb.getDebugID() << ":\n";
for (auto *arg : bb.getArguments())
dumpActivityInfo(arg, indices, activityInfo, s);
for (auto &inst : bb)
for (auto res : inst.getResults())
dumpActivityInfo(res, indices, activityInfo, s);
s << '\n';
/// If the original function doesn't have a return, it cannot be differentiated.
/// Returns true if error is emitted.
static bool diagnoseNoReturn(ADContext &context, SILFunction *original,
DifferentiationInvoker invoker) {
if (original->findReturnBB() != original->end())
return false;
original->getLocation().getEndSourceLoc(), invoker,
return true;
/// If the original function contains unsupported control flow, emit a "control
/// flow unsupported" error at appropriate source locations. Returns true if
/// error is emitted.
/// Update as control flow support is added. Currently, branching terminators
/// other than `br`, `cond_br`, `switch_enum` are not supported.
static bool diagnoseUnsupportedControlFlow(ADContext &context,
SILFunction *original,
DifferentiationInvoker invoker) {
if (original->getBlocks().size() <= 1)
return false;
// Diagnose unsupported branching terminators.
for (auto &bb : *original) {
auto *term = bb.getTerminator();
// Supported terminators are: `br`, `cond_br`, `switch_enum`.
if (isa<BranchInst>(term) || isa<CondBranchInst>(term) ||
// If terminator is an unsupported branching terminator, emit an error.
if (term->isBranch()) {
term, invoker, diag::autodiff_control_flow_not_supported);
return true;
return false;
/// Check whether the given requirements are satisfied, with the given
/// derivative generic signature (containing requirements), original function,
/// and substitution map. Returns true if error is emitted.
static bool diagnoseUnsatisfiedRequirements(ADContext &context,
GenericSignature derivativeGenSig,
SILFunction *original,
SubstitutionMap substMap,
DifferentiationInvoker invoker,
SourceLoc loc) {
// If there are no derivative requirements, return false.
if (!derivativeGenSig)
return false;
auto requirements = derivativeGenSig->getRequirements();
if (requirements.empty())
return false;
// Iterate through all requirements and check whether they are satisfied.
auto *swiftModule = context.getModule().getSwiftModule();
SmallVector<Requirement, 2> unsatisfiedRequirements;
for (auto req : requirements) {
auto firstType = req.getFirstType();
Type secondType;
// Substitute first and second types using the given substitution map,
// looking up conformances in the current module, if possible.
if (auto substFirstType =
LookUpConformanceInModule(swiftModule))) {
firstType = substFirstType;
if (req.getKind() != RequirementKind::Layout) {
secondType = req.getSecondType();
if (auto substSecondType =
LookUpConformanceInModule(swiftModule))) {
secondType = substSecondType;
switch (req.getKind()) {
// Check layout requirements.
case RequirementKind::Layout: {
auto layout = req.getLayoutConstraint();
switch (layout->getKind()) {
case LayoutConstraintKind::Class:
if (!firstType->satisfiesClassConstraint())
// TODO: Check other layout requirements. Note that `@differentiable`
// attribute type-checking does not yet support layout requirements in
// where clauses; layout requirements in derivative generic signatures
// can be formed only from `differentiable_function` instructions whose
// original function operand is generic with layout requirements.
// Check same type requirements.
case RequirementKind::SameType:
// If the first type does not equal the second type, then record the
// unsatisfied requirement.
if (!firstType->isEqual(secondType))
// Check superclass requirements.
case RequirementKind::Superclass: {
// If the second type is not an exact superclass of second type, then
// record the unsatisfied requirement.
if (!secondType->isExactSuperclassOf(firstType))
// Check conformance requirements.
case RequirementKind::Conformance: {
auto protocolType = req.getSecondType()->castTo<ProtocolType>();
auto protocol = protocolType->getDecl();
assert(protocol && "Expected protocol in generic signature requirement");
// If the first type does not conform to the second type in the current
// module, then record the unsatisfied requirement.
if (!swiftModule->lookupConformance(firstType, protocol))
if (unsatisfiedRequirements.empty())
return false;
// Diagnose unsatisfied requirements.
std::string reqText;
llvm::raw_string_ostream stream(reqText);
[&](Requirement req) { req.print(stream, PrintOptions()); },
[&] { stream << ", "; });
loc, invoker, diag::autodiff_function_assoc_func_unmet_requirements,
return true;
// Code emission utilities
/// Given a value, extracts all elements to `results` from this value if it has
/// a tuple type. Otherwise, add this value directly to `results`.
static void extractAllElements(SILValue value, SILBuilder &builder,
SmallVectorImpl<SILValue> &results) {
auto tupleType = value->getType().getAs<TupleType>();
if (!tupleType) {
if (builder.hasOwnership()) {
auto *dti = builder.createDestructureTuple(value.getLoc(), value);
results.append(dti->getResults().begin(), dti->getResults().end());
for (auto i : range(tupleType->getNumElements()))
results.push_back(builder.createTupleExtract(value.getLoc(), value, i));
/// Given a range of elements, joins these into a single value. If there's
/// exactly one element, returns that element. Otherwise, creates a tuple using
/// a `tuple` instruction.
static SILValue joinElements(ArrayRef<SILValue> elements, SILBuilder &builder,
SILLocation loc) {
if (elements.size() == 1)
return elements.front();
return builder.createTuple(loc, elements);
/// Given an apply site, emit copies of all parameters and place them in
/// `copiedArgs`. Any buffers that need to be destroyed will be added to
/// `newArgsToDestroy`. Any new buffers that need to be deallocated will be
/// added to `newBuffersToDealloc`. This helper is used for duplicating an
/// apply site.
static void copyParameterArgumentsForApply(
ApplySite applySite, SmallVectorImpl<SILValue> &copiedArgs,
SmallVectorImpl<SILValue> &newArgsToDestroy,
SmallVectorImpl<AllocStackInst *> &newBuffersToDealloc) {
auto &s = getADDebugStream() << "Copying arguments from apply site: ";
auto loc = applySite.getLoc();
SILBuilder copyBuilder(applySite.getInstruction());
for (auto &argOperand : applySite.getArgumentOperands()) {
auto arg = argOperand.get();
auto argConv = applySite.getArgumentConvention(argOperand);
auto collectNewArg = [&](SILValue newArg) {
if (argConv.isGuaranteedConvention() &&
argConv != SILArgumentConvention::Indirect_InoutAliasable)
// Copy the argument if it's to be owned by the newly created closure.
// Objects are to be retained.
if (arg->getType().isObject()) {
auto newArg = copyBuilder.emitCopyValueOperation(loc, arg);
// Addresses depend on argument conventions.
// If the argument is an aliasable inout reference, do not copy the
// argument since it's a `@noescape` capture.
if (argConv == SILArgumentConvention::Indirect_InoutAliasable) {
// Otherwise, it must be address-only. Create a new buffer and perform
// `copy_addr`.
auto *argCopy = copyBuilder.createAllocStack(loc, arg->getType());
copyBuilder.createCopyAddr(loc, arg, argCopy, IsNotTake,
/// When a function value is used in an instruction (usually `apply`), there's
/// some conversion instruction in between, e.g. `thin_to_thick_function`. Given
/// a new function value and an old function value, this helper function
/// recursively converts the new function just like how the old function is
/// converted. If the new function's generic signature is specified, it is used
/// to create substitution maps for reapplied `partial_apply` instructions.
static SILValue
SILValue newFunc, SILValue oldFunc, SILValue oldConvertedFunc,
SILBuilder &builder, SILLocation loc,
SmallVectorImpl<AllocStackInst *> &newBuffersToDealloc,
GenericSignature newFuncGenSig = GenericSignature()) {
// If the old func is the new func, then there's no conversion.
if (oldFunc == oldConvertedFunc)
return newFunc;
// Handle a few instruction cases.
// thin_to_thick_function
if (auto *tttfi = dyn_cast<ThinToThickFunctionInst>(oldConvertedFunc)) {
auto innerNewFunc = reapplyFunctionConversion(
newFunc, oldFunc, tttfi->getOperand(), builder, loc,
newBuffersToDealloc, newFuncGenSig);
auto operandFnTy = innerNewFunc->getType().castTo<SILFunctionType>();
auto thickTy = operandFnTy->getWithRepresentation(
auto silTy = SILType::getPrimitiveObjectType(thickTy);
return builder.createThinToThickFunction(loc, innerNewFunc, silTy);
// partial_apply
if (auto *pai = dyn_cast<PartialApplyInst>(oldConvertedFunc)) {
SmallVector<SILValue, 8> newArgs;
SmallVector<SILValue, 1> newArgsToDestroy;
copyParameterArgumentsForApply(pai, newArgs, newArgsToDestroy,
auto innerNewFunc = reapplyFunctionConversion(
newFunc, oldFunc, pai->getCallee(), builder, loc, newBuffersToDealloc,
// If new function's generic signature is specified, use it to create
// substitution map for reapplied `partial_apply` instruction.
auto substMap = !newFuncGenSig
? pai->getSubstitutionMap()
: SubstitutionMap::get(
newFuncGenSig, QuerySubstitutionMap{pai->getSubstitutionMap()},
return builder.createPartialApply(loc, innerNewFunc, substMap, newArgs,
llvm_unreachable("Unhandled function conversion instruction");
/// Emits a reference to a derivative function of `original`, differentiated
/// with respect to a superset of `desiredIndices`. Returns the `SILValue` for
/// the derivative function and the actual indices that the derivative function
/// is with respect to.
/// Returns `None` on failure, signifying that a diagnostic has been emitted.
/// Creates new differentiation tasks, if necessary, using `invoker` as the
/// invoker. Calls `taskCallback` for all newly-created tasks (but may also call
/// `taskCallback` for already-existing tasks), so that the caller can make sure
/// that the task actually gets executed.
/// FIXME: This is too complicated and needs to be rewritten.
static Optional<std::pair<SILValue, SILAutoDiffIndices>>
ADContext &context, SILBuilder &builder, SILAutoDiffIndices desiredIndices,
AutoDiffDerivativeFunctionKind kind, SILValue original,
DifferentiationInvoker invoker,
SmallVectorImpl<AllocStackInst *> &newBuffersToDealloc) {
SILValue functionSource = original;
// If `original` is itself an `DifferentiableFunctionExtractInst` whose kind matches
// the given kind and desired differentiation parameter indices, simply
// extract the derivative function of its function operand, retain the
// derivative function, and return it.
if (auto *inst = original->getDefiningInstruction())
if (auto *dfei = dyn_cast<DifferentiableFunctionExtractInst>(inst))
if (dfei->getExtractee() ==
functionSource = dfei->getFunctionOperand();
// If `functionSource` is a `@differentiable` function, just extract the
// derivative function.
if (auto diffableFnType =
functionSource->getType().castTo<SILFunctionType>()) {
if (diffableFnType->isDifferentiable()) {
auto paramIndices = diffableFnType->getDifferentiationParameterIndices();
for (auto i : desiredIndices.parameters->getIndices()) {
if (!paramIndices->contains(i)) {
context.emitNondifferentiabilityError(functionSource, invoker,
return None;
auto borrowedDiffFunc = builder.emitBeginBorrowOperation(
functionSource.getLoc(), functionSource);
SILValue derivativeFn = builder.createDifferentiableFunctionExtract(
borrowedDiffFunc.getLoc(), kind, borrowedDiffFunc);
derivativeFn =
builder.emitCopyValueOperation(functionSource.getLoc(), derivativeFn);
builder.emitEndBorrowOperation(functionSource.getLoc(), borrowedDiffFunc);
SILAutoDiffIndices indices(0, desiredIndices.parameters);
return std::make_pair(derivativeFn, indices);
// Find local function reference.
if (auto *originalFRI =
peerThroughFunctionConversions<FunctionRefInst>(original)) {
auto loc = originalFRI->getLoc();
auto *originalFn = originalFRI->getReferencedFunctionOrNull();
// Attempt to look up a `[differentiable]` attribute that minimally
// satisfies the specified indices.
// TODO(TF-482): Change `lookUpMinimalDifferentiableAttr` to additionally
// check whether `[differentiable]` attribute generic requirements are
// satisfied.
auto *minimalAttr =
context.lookUpMinimalDifferentiableAttr(originalFn, desiredIndices);
if (!minimalAttr) {
// If the function is intentionally marked as being opaque to
// differentiation, then we should not create a task for it.
if (originalFn->hasSemanticsAttr("autodiff.opaque")) {
context.emitNondifferentiabilityError(original, invoker,
return None;
// Check and diagnose non-differentiable arguments.
auto originalFnTy = originalFn->getLoweredFunctionType();
for (unsigned paramIndex : range(originalFnTy->getNumParameters())) {
if (desiredIndices.isWrtParameter(paramIndex) &&
.isDifferentiable(context.getModule())) {
auto diag = context.emitNondifferentiabilityError(
original, invoker, diag::autodiff_nondifferentiable_argument);
return None;
// Check and diagnose non-differentiable results.
if (!originalFnTy->getResults()[desiredIndices.source]
.isDifferentiable(context.getModule())) {
original, invoker, diag::autodiff_nondifferentiable_result);
return None;
// Check and diagnose external declarations.
if (originalFn->isExternalDeclaration()) {
original, invoker,
return None;
// Sanity check passed. Create a new `[differentiable]` attribute and
// process it it.
GenericSignature contextualDerivativeGenSig = GenericSignature();
if (invoker.getKind() ==
contextualDerivativeGenSig = invoker.getIndirectDifferentiation().second
auto *newAttr = context.getOrCreateDifferentiableAttr(
originalFn, desiredIndices, contextualDerivativeGenSig);
if (context.processDifferentiableAttribute(originalFn, newAttr, invoker))
return None;
minimalAttr = newAttr;
// TODO(TF-482): Move generic requirement checking logic to
// `lookUpMinimalDifferentiableAttr`.
// Get the substitution map for checking unmet generic requirements.
// By default, use the forwarding substitution map of the original function.
// If the original callee is a `partial_apply` or `apply` instruction, use
// its substitution map instead.
auto substMap = original->getFunction()->getForwardingSubstitutionMap();
if (auto *pai = dyn_cast<PartialApplyInst>(original)) {
substMap = pai->getSubstitutionMap();
} else if (auto *ai = dyn_cast<ApplyInst>(original)) {
substMap = ai->getSubstitutionMap();
if (diagnoseUnsatisfiedRequirements(
context, minimalAttr->getDerivativeGenericSignature(), originalFn,
substMap, invoker, original.getLoc().getSourceLoc()))
return None;
if (context.processDifferentiableAttribute(
originalFn, minimalAttr, invoker))
return None;
SILFunction *derivativeFn = nullptr;
switch (kind) {
case AutoDiffDerivativeFunctionKind::JVP:
assert(!minimalAttr->getJVPName().empty() && "Expected JVP name");
derivativeFn = context.getModule().lookUpFunction(minimalAttr->getJVPName());
case AutoDiffDerivativeFunctionKind::VJP:
assert(!minimalAttr->getVJPName().empty() && "Expected VJP name");
derivativeFn = context.getModule().lookUpFunction(minimalAttr->getVJPName());
auto *derivativeFnRef = builder.createFunctionRef(loc, derivativeFn);
// FIXME(TF-201): Handle direct differentiation of reabstraction thunks.
// Tentative solution: clone a new reabstraction thunk where function
// argument has a `@differentiable` function type.
if (originalFn->isThunk() == IsReabstractionThunk) {
// Handle here.
auto convertedRef = reapplyFunctionConversion(
derivativeFnRef, originalFRI, original, builder, loc,
return std::make_pair(convertedRef, minimalAttr->getIndices());
// Find witness method retrieval.
if (auto *witnessMethod =
peerThroughFunctionConversions<WitnessMethodInst>(original)) {
auto loc = witnessMethod->getLoc();
auto requirementDeclRef = witnessMethod->getMember();
auto *requirementDecl = requirementDeclRef.getDecl();
auto witnessMethodType = witnessMethod->getType().castTo<SILFunctionType>();
// If requirement declaration does not have any `@differentiable`
// attributes, produce an error.
if (!requirementDecl->getAttrs().hasAttribute<DifferentiableAttr>()) {
original, invoker, diag::autodiff_protocol_member_not_differentiable);
return None;
// Get the minimal `@differentiable` attribute and parameter index subset.
const DifferentiableAttr *minimalAttr;
IndexSubset *minimalParamIndexSet;
std::tie(minimalAttr, minimalParamIndexSet) =
requirementDeclRef, witnessMethodType, desiredIndices);
SILAutoDiffIndices minimalIndices(/*source*/ 0, minimalParamIndexSet);
// If minimal `@differentiable` attribute does not exist, then no attribute
// exists with a superset of the desired indices. Produce an error.
if (!minimalAttr) {
original, invoker,
return None;
// Emit a `witness_method` instruction for the derivative function.
auto originalType = witnessMethod->getType().castTo<SILFunctionType>();
auto assocType = originalType->getAutoDiffDerivativeFunctionType(
minimalIndices.parameters, minimalIndices.source,
kind, context.getTypeConverter(),
auto *autoDiffFuncId = AutoDiffDerivativeFunctionIdentifier::get(
kind, minimalAttr->getParameterIndices(), context.getASTContext());
auto *ref = builder.createWitnessMethod(
loc, witnessMethod->getLookupType(), witnessMethod->getConformance(),
auto convertedRef =
reapplyFunctionConversion(ref, witnessMethod, original, builder, loc,
return std::make_pair(convertedRef, minimalIndices);
// Find class method.
if (auto *classMethodInst =
peerThroughFunctionConversions<ClassMethodInst>(original)) {
auto loc = classMethodInst->getLoc();
auto methodDeclRef = classMethodInst->getMember();
auto *methodDecl = methodDeclRef.getDecl();
auto classMethodType = classMethodInst->getType().castTo<SILFunctionType>();
// If method declaration does not have any `@differentiable` attributes,
// produce an error.
if (!methodDecl->getAttrs().hasAttribute<DifferentiableAttr>()) {
original, invoker, diag::autodiff_class_member_not_differentiable);
return None;
// Get the minimal `@differentiable` attribute and parameter index subset.
const DifferentiableAttr *minimalAttr;
IndexSubset *minimalParamIndexSet;
std::tie(minimalAttr, minimalParamIndexSet) =
methodDeclRef, classMethodType, desiredIndices);
SILAutoDiffIndices minimalIndices(/*source*/ 0, minimalParamIndexSet);
// If minimal `@differentiable` attribute does not exist, then no attribute
// exists with a superset of the desired indices. Produce an error.
if (!minimalAttr) {
original, invoker,
return None;
// Emit a `class_method` instruction for the derivative function.
auto originalType = classMethodInst->getType().castTo<SILFunctionType>();
auto assocType = originalType->getAutoDiffDerivativeFunctionType(
minimalIndices.parameters, minimalIndices.source,
kind, context.getTypeConverter(),
auto *autoDiffFuncId = AutoDiffDerivativeFunctionIdentifier::get(
kind, minimalAttr->getParameterIndices(),
auto *ref = builder.createClassMethod(
loc, classMethodInst->getOperand(),
auto convertedRef =
reapplyFunctionConversion(ref, classMethodInst, original, builder, loc,
return std::make_pair(convertedRef, minimalIndices);
// Emit the general opaque function error.
context.emitNondifferentiabilityError(original, invoker,
return None;
/// Emit a zero value into the given buffer access by calling
/// ``. The given type must conform to
/// `AdditiveArithmetic`.
static void emitZeroIntoBuffer(
SILBuilder &builder, CanType type, SILValue bufferAccess,
SILLocation loc) {
auto &astCtx = builder.getASTContext();
auto *swiftMod = builder.getModule().getSwiftModule();
auto &typeConverter = builder.getModule().Types;
// Look up conformance to `AdditiveArithmetic`.
auto *additiveArithmeticProto =
auto confRef = swiftMod->lookupConformance(type, additiveArithmeticProto);
assert(confRef.hasValue() && "Missing conformance to `AdditiveArithmetic`");
// Look up ``.
auto zeroDeclLookup = additiveArithmeticProto->lookupDirect(astCtx.Id_zero);
auto *zeroDecl = cast<VarDecl>(zeroDeclLookup.front());
auto *accessorDecl = zeroDecl->getAccessor(AccessorKind::Get);
SILDeclRef accessorDeclRef(accessorDecl, SILDeclRef::Kind::Func);
auto silFnType = typeConverter.getConstantType(accessorDeclRef);
// %wm = witness_method ...
auto *getter = builder.createWitnessMethod(
loc, type, *confRef, accessorDeclRef, silFnType);
// %metatype = metatype $T
auto metatypeType = CanMetatypeType::get(
type, MetatypeRepresentation::Thick);
auto metatype = builder.createMetatype(
loc, SILType::getPrimitiveObjectType(metatypeType));
auto subMap = SubstitutionMap::getProtocolSubstitutions(
additiveArithmeticProto, type, *confRef);
builder.createApply(loc, getter, subMap, {bufferAccess, metatype},
/*isNonThrowing*/ false);
builder.emitDestroyValueOperation(loc, getter);
// Thunk helpers
// These helpers are copied/adapted from SILGen. They should be refactored and
// moved to a shared location.
static CanGenericSignature
buildThunkSignature(SILFunction *fn,
bool inheritGenericSig,
OpenedArchetypeType *openedExistential,
GenericEnvironment *&genericEnv,
SubstitutionMap &contextSubs,
SubstitutionMap &interfaceSubs,
ArchetypeType *&newArchetype) {
// If there's no opened existential, we just inherit the generic environment
// from the parent function.
if (openedExistential == nullptr) {
auto genericSig = fn->getLoweredFunctionType()->getGenericSignature();
genericEnv = fn->getGenericEnvironment();
interfaceSubs = fn->getForwardingSubstitutionMap();
contextSubs = interfaceSubs;
return genericSig;
auto &ctx = fn->getASTContext();
GenericSignatureBuilder builder(ctx);
// Add the existing generic signature.
int depth = 0;
if (inheritGenericSig) {
if (auto genericSig =
fn->getLoweredFunctionType()->getGenericSignature()) {
depth = genericSig->getGenericParams().back()->getDepth() + 1;
// Add a new generic parameter to replace the opened existential.
auto *newGenericParam = GenericTypeParamType::get(depth, 0, ctx);
Requirement newRequirement(RequirementKind::Conformance, newGenericParam,
auto source =
builder.addRequirement(newRequirement, source, nullptr);
auto genericSig = std::move(builder).computeGenericSignature(
SourceLoc(), /*allowConcreteGenericParams=*/true);
genericEnv = genericSig->getGenericEnvironment();
newArchetype = genericEnv->mapTypeIntoContext(newGenericParam)
// Calculate substitutions to map the caller's archetypes to the thunk's
// archetypes.
if (auto calleeGenericSig =
fn->getLoweredFunctionType()->getGenericSignature()) {
contextSubs = SubstitutionMap::get(
[&](SubstitutableType *type) -> Type {
return genericEnv->mapTypeIntoContext(type);
// Calculate substitutions to map interface types to the caller's archetypes.
interfaceSubs = SubstitutionMap::get(
[&](SubstitutableType *type) -> Type {
if (type->isEqual(newGenericParam))
return openedExistential;
return fn->mapTypeIntoContext(type);
return genericSig->getCanonicalSignature();
/// The thunk kinds used in the differentiation transform.
enum class DifferentiationThunkKind {
/// A reabstraction thunk.
/// Reabstraction thunks transform a function-typed value to another one with
/// different parameter/result abstraction patterns. This is identical to the
/// thunks generated by SILGen.
/// An index subset thunk.
/// An index subset thunk is used transform JVP/VJPs into a version that is
/// "wrt" fewer differentiation parameters.
/// - Differentials of thunked JVPs use zero for non-requested differentiation
// parameters.
/// - Pullbacks of thunked VJPs discard results for non-requested
/// differentiation parameters.
/// Build the type of a function transformation thunk.
static CanSILFunctionType buildThunkType(SILFunction *fn,
CanSILFunctionType &sourceType,
CanSILFunctionType &expectedType,
GenericEnvironment *&genericEnv,
SubstitutionMap &interfaceSubs,
bool withoutActuallyEscaping,
DifferentiationThunkKind thunkKind) {
auto &module = fn->getModule();
auto origType = sourceType;
// Cannot build a reabstraction thunk without context. Ownership semantics
// on the result type are required.
if (thunkKind == DifferentiationThunkKind::Reabstraction)
// This may inherit @noescape from the expected type. The `@noescape`
// attribute is only stripped when using this type to materialize a new decl.
// Use `@convention(thin)` if:
// - Building a reabstraction thunk type.
// - Building an index subset thunk type, where the expected type has context
// (i.e. is `@convention(thick)`).
auto extInfo = expectedType->getExtInfo();
if (thunkKind == DifferentiationThunkKind::Reabstraction ||
extInfo.hasContext()) {
extInfo = extInfo.withRepresentation(
if (withoutActuallyEscaping)
extInfo = extInfo.withNoEscape(false);
// Does the thunk type involve archetypes other than opened existentials?
bool hasArchetypes = false;
// Does the thunk type involve an open existential type?
CanOpenedArchetypeType openedExistential;
auto archetypeVisitor = [&](CanType t) {
if (auto archetypeTy = dyn_cast<OpenedArchetypeType>(t)) {
if (auto opened = dyn_cast<OpenedArchetypeType>(archetypeTy)) {
assert((openedExistential == CanArchetypeType() ||
openedExistential == opened) &&
"one too many open existentials");
openedExistential = opened;
} else {
hasArchetypes = true;
// Use the generic signature from the context if the thunk involves
// generic parameters.
CanGenericSignature genericSig;
SubstitutionMap contextSubs;
ArchetypeType *newArchetype = nullptr;
if (expectedType->hasArchetype() || sourceType->hasArchetype()) {
genericSig = buildThunkSignature(
fn, hasArchetypes, openedExistential, genericEnv, contextSubs,
interfaceSubs, newArchetype);
// Utility function to apply contextSubs, and also replace the
// opened existential with the new archetype.
auto substIntoThunkContext = [&](CanType t) -> CanType {
return t.subst(
[&](SubstitutableType *type) -> Type {
if (CanType(type) == openedExistential)
return newArchetype;
return Type(type).subst(contextSubs);
sourceType = cast<SILFunctionType>(substIntoThunkContext(sourceType));
expectedType = cast<SILFunctionType>(substIntoThunkContext(expectedType));
// If our parent function was pseudogeneric, this thunk must also be
// pseudogeneric, since we have no way to pass generic parameters.
if (genericSig)
if (origType->isPseudogeneric())
extInfo = extInfo.withIsPseudogeneric();
// Add the function type as the parameter.
auto contextConvention =
? ParameterConvention::Direct_Unowned
: ParameterConvention::Direct_Guaranteed;
SmallVector<SILParameterInfo, 4> params;
// Add reabstraction function parameter only if building a reabstraction thunk
// type.
if (thunkKind == DifferentiationThunkKind::Reabstraction)
params.push_back({sourceType, sourceType->getExtInfo().hasContext()
? contextConvention
: ParameterConvention::Direct_Unowned});
// Map the parameter and expected types out of context to get the interface
// type of the thunk.
SmallVector<SILParameterInfo, 4> interfaceParams;
for (auto &param : params) {
auto paramIfaceTy = param.getType()->mapTypeOutOfContext();
paramIfaceTy->getCanonicalType(genericSig), param.getConvention()));
SmallVector<SILYieldInfo, 4> interfaceYields;
for (auto &yield : expectedType->getYields()) {
auto yieldIfaceTy = yield.getType()->mapTypeOutOfContext();
auto interfaceYield =
SmallVector<SILResultInfo, 4> interfaceResults;
for (auto &result : expectedType->getResults()) {
auto resultIfaceTy = result.getType()->mapTypeOutOfContext();
auto interfaceResult =
Optional<SILResultInfo> interfaceErrorResult;
if (expectedType->hasErrorResult()) {
auto errorResult = expectedType->getErrorResult();
auto errorIfaceTy = errorResult.getType()->mapTypeOutOfContext();
interfaceErrorResult =
// The type of the thunk function.
return SILFunctionType::get(
genericSig, extInfo, expectedType->getCoroutineKind(),
ParameterConvention::Direct_Unowned, interfaceParams, interfaceYields,
interfaceResults, interfaceErrorResult, module.getASTContext());
/// Get or create a reabstraction thunk from `fromType` to `toType`, to be
/// called in `caller`.
static SILFunction *getOrCreateReabstractionThunk(SILOptFunctionBuilder &fb,
SILModule &module,
SILLocation loc,
SILFunction *caller,
CanSILFunctionType fromType,
CanSILFunctionType toType) {
SubstitutionMap interfaceSubs;
GenericEnvironment *genericEnv = nullptr;
auto thunkType = buildThunkType(
caller, fromType, toType, genericEnv, interfaceSubs,
/*withoutActuallyEscaping*/ false,
auto thunkDeclType =
auto fromInterfaceType = fromType->mapTypeOutOfContext()->getCanonicalType();
auto toInterfaceType = toType->mapTypeOutOfContext()->getCanonicalType();
Mangle::ASTMangler mangler;
std::string name = mangler.mangleReabstractionThunkHelper(
thunkType, fromInterfaceType, toInterfaceType,
Type(), module.getSwiftModule());
auto *thunk = fb.getOrCreateSharedFunction(
loc, name, thunkDeclType, IsBare, IsTransparent, IsSerialized,
ProfileCounter(), IsReabstractionThunk, IsNotDynamic);
if (!thunk->empty())
return thunk;
auto *entry = thunk->createBasicBlock();
SILBuilder builder(entry);
SILFunctionConventions fromConv(fromType, module);
SILFunctionConventions toConv(toType, module);
auto *fnArg = thunk->getArgumentsWithoutIndirectResults().back();
SmallVector<SILValue, 4> arguments;
auto toArgIter = thunk->getArguments().begin();
auto useNextArgument = [&]() {
SmallVector<AllocStackInst *, 4> localAllocations;
auto createAllocStack = [&](SILType type) {
auto *alloc = builder.createAllocStack(loc, type);
return alloc;
// Handle indirect results.
assert(fromType->getNumResults() == toType->getNumResults());
for (unsigned resIdx : range(toType->getNumResults())) {
auto fromRes = fromConv.getResults()[resIdx];
auto toRes = toConv.getResults()[resIdx];
// No abstraction mismatch.
if (fromRes.isFormalIndirect() == toRes.isFormalIndirect()) {
// If result types are indirect, directly pass as next argument.
if (toRes.isFormalIndirect())
// Convert indirect result to direct result.
if (fromRes.isFormalIndirect()) {
SILType resultTy = fromConv.getSILType(fromRes);
auto *indRes = createAllocStack(resultTy);
// Convert direct result to indirect result.
// Increment thunk argument iterator; reabstraction handled later.
// Reabstract parameters.
assert(toType->getNumParameters() == fromType->getNumParameters());
for (unsigned paramIdx : range(toType->getNumParameters())) {
auto fromParam = fromConv.getParameters()[paramIdx];
auto toParam = toConv.getParameters()[paramIdx];
// No abstraction mismatch. Directly use next argument.
if (fromParam.isFormalIndirect() == toParam.isFormalIndirect()) {
// Convert indirect parameter to direct parameter.
if (fromParam.isFormalIndirect()) {
auto paramTy = fromConv.getSILType(fromType->getParameters()[paramIdx]);
if (!paramTy.hasArchetype())
paramTy = thunk->mapTypeIntoContext(paramTy);
auto *toArg = *toArgIter++;
auto *buf = createAllocStack(toArg->getType());
builder.createStore(loc, toArg, buf,
// Convert direct parameter to indirect parameter.
auto *toArg = *toArgIter++;
auto *load = builder.createLoad(loc, toArg,
auto *apply = builder.createApply(
loc, fnArg, SubstitutionMap(), arguments, /*isNonThrowing*/ false);
// Get return elements.
SmallVector<SILValue, 4> results;
// Extract all direct results.
SmallVector<SILValue, 4> directResults;
extractAllElements(apply, builder, directResults);
auto fromDirResultsIter = directResults.begin();
auto fromIndResultsIter = apply->getIndirectSILResults().begin();
auto toIndResultsIter = thunk->getIndirectResults().begin();
// Reabstract results.
for (unsigned resIdx : range(toType->getNumResults())) {
auto fromRes = fromConv.getResults()[resIdx];
auto toRes = toConv.getResults()[resIdx];
// No abstraction mismatch.
if (fromRes.isFormalIndirect() == toRes.isFormalIndirect()) {
// If result types are direct, add call result as direct thunk result.
if (toRes.isFormalDirect())
// If result types are indirect, increment indirect result iterators.
else {
// Load direct results from indirect results.
if (fromRes.isFormalIndirect()) {
auto indRes = *fromIndResultsIter++;
auto *load = builder.createLoad(loc, indRes,
// Store direct results to indirect results.
SILType resultTy = toConv.getSILType(toRes);
auto indRes = *toIndResultsIter++;
builder.createStore(loc, *fromDirResultsIter++, indRes,
auto retVal = joinElements(results, builder, loc);
// Deallocate local allocations.
for (auto *alloc : llvm::reverse(localAllocations))
builder.createDeallocStack(loc, alloc);
// Create return.
builder.createReturn(loc, retVal);
LLVM_DEBUG(auto &s = getADDebugStream() << "Created reabstraction thunk.\n";
s << " From type: " << fromType << '\n';
s << " To type: " << toType << '\n';
s << '\n' << *thunk);
return thunk;
namespace {
class VJPEmitter final
: public TypeSubstCloner<VJPEmitter, SILOptFunctionBuilder> {
friend class PullbackEmitter;
/// The global context.
ADContext &context;
/// The original function.
SILFunction *const original;
/// The `[differentiable]` attribute.
SILDifferentiableAttr *const attr;
/// The VJP function.
SILFunction *const vjp;
/// The pullback function.
SILFunction *pullback;
/// The differentiation invoker.
DifferentiationInvoker invoker;
/// Info from activity analysis on the original function.
const DifferentiableActivityInfo &activityInfo;
/// The linear map info.
LinearMapInfo pullbackInfo;
/// Caches basic blocks whose phi arguments have been remapped (adding a
/// predecessor enum argument).
SmallPtrSet<SILBasicBlock *, 4> remappedBasicBlocks;
/// A pair of a trampoline block phi argument and its corresponding
/// destination block phi argument.
struct TrampolinedArgumentPair {
SILPhiArgument *trampolineArgument;
SILPhiArgument *destinationArgument;
/// An array that keeps track of all `@guaranteed` phi arguments in any
/// trampoline blocks we've added. Each of these arguments needs to have a
/// lifetime-ending use past its destination argument's lifetime-ending use,
/// so we keep track of these pairs of arguments and emit `end_borrow`s when
/// function cloning is finished.
SmallVector<TrampolinedArgumentPair, 8> trampolinedGuaranteedPhiArguments;
bool errorOccurred = false;
/// Mapping from original blocks to pullback values. Used to build pullback
/// struct instances.
DenseMap<SILBasicBlock *, SmallVector<SILValue, 8>> pullbackValues;
ASTContext &getASTContext() const { return vjp->getASTContext(); }
SILModule &getModule() const { return vjp->getModule(); }
const SILAutoDiffIndices &getIndices() const { return attr->getIndices(); }
static SubstitutionMap getSubstitutionMap(SILFunction *original,
SILFunction *vjp) {
auto substMap = original->getForwardingSubstitutionMap();
if (auto *vjpGenEnv = vjp->getGenericEnvironment()) {
auto vjpSubstMap = vjpGenEnv->getForwardingSubstitutionMap();
substMap = SubstitutionMap::get(
vjpGenEnv->getGenericSignature(), QuerySubstitutionMap{vjpSubstMap},
return substMap;
static const DifferentiableActivityInfo &getActivityInfo(
ADContext &context, SILFunction *original,
const SILAutoDiffIndices &indices, SILFunction *vjp) {
// Get activity info of the original function.
auto &passManager = context.getPassManager();
auto *activityAnalysis =
auto &activityCollection = *activityAnalysis->get(original);
auto &activityInfo = activityCollection.getActivityInfo(
dumpActivityInfo(*original, indices, activityInfo, getADDebugStream()));
return activityInfo;
explicit VJPEmitter(ADContext &context, SILFunction *original,
SILDifferentiableAttr *attr, SILFunction *vjp,
DifferentiationInvoker invoker)
: TypeSubstCloner(*vjp, *original, getSubstitutionMap(original, vjp)),
context(context), original(original), attr(attr), vjp(vjp),
invoker(invoker), activityInfo(getActivityInfo(
context, original, attr->getIndices(), vjp)),
pullbackInfo(context, AutoDiffLinearMapKind::Pullback, original, vjp,
attr->getIndices(), activityInfo) {
// Create empty pullback function.
pullback = createEmptyPullback();
SILFunction *createEmptyPullback() {
auto &module = context.getModule();
auto origTy = original->getLoweredFunctionType();
auto lookupConformance = LookUpConformanceInModule(module.getSwiftModule());
// RAII that pushes the original function's generic signature to
// `module.Types` so that the calls to `module.Types.getTypeLowering()`
// below will know the original function's generic parameter types.
Lowering::GenericContextScope genericContextScope(
module.Types, origTy->getGenericSignature());
// Given a type, returns its formal SIL parameter info.
auto getTangentParameterInfoForOriginalResult = [&](
CanType tanType, ResultConvention origResConv) -> SILParameterInfo {
auto &tl = context.getTypeConverter().getTypeLowering(
tanType, ResilienceExpansion::Minimal);
ParameterConvention conv;
switch (origResConv) {
case ResultConvention::Owned:
case ResultConvention::Autoreleased:
conv = tl.isTrivial()
? ParameterConvention::Direct_Unowned
: ParameterConvention::Direct_Guaranteed;
case ResultConvention::Unowned:
case ResultConvention::UnownedInnerPointer:
conv = ParameterConvention::Direct_Unowned;
case ResultConvention::Indirect:
conv = ParameterConvention::Indirect_In_Guaranteed;
return {tanType, conv};
// Given a type, returns its formal SIL result info.
auto getTangentResultInfoForOriginalParameter = [&](
CanType tanType, ParameterConvention origParamConv) -> SILResultInfo {
auto &tl = context.getTypeConverter().getTypeLowering(
tanType, ResilienceExpansion::Minimal);
ResultConvention conv;
switch (origParamConv) {
case ParameterConvention::Direct_Owned:
case ParameterConvention::Direct_Guaranteed:
case ParameterConvention::Direct_Unowned:
conv = tl.isTrivial()
? ResultConvention::Unowned
: ResultConvention::Owned;
case ParameterConvention::Indirect_In:
case ParameterConvention::Indirect_Inout:
case ParameterConvention::Indirect_In_Constant:
case ParameterConvention::Indirect_In_Guaranteed:
case ParameterConvention::Indirect_InoutAliasable:
conv = ResultConvention::Indirect;
return {tanType, conv};
// Parameters of the pullback are:
// - the tangent vectors of the original results, and
// - a pullback struct.
// Results of the pullback are in the tangent space of the original
// parameters.
SmallVector<SILParameterInfo, 8> pbParams;
SmallVector<SILResultInfo, 8> adjResults;
auto origParams = origTy->getParameters();
auto indices = attr->getIndices();
// Add pullback parameter for the seed.
auto origResInfo = origTy->getResults()[indices.source];
->getCanonicalType(), origResInfo.getConvention()));
// Accept a pullback struct in the pullback parameter list. This is the
// returned pullback's closure context.
auto *origExit = &*original->findReturnBB();
auto *pbStruct = pullbackInfo.getLinearMapStruct(origExit);
auto pbStructType = pbStruct->getDeclaredInterfaceType()
pbParams.push_back({pbStructType, ParameterConvention::Direct_Owned});
// Add pullback results for the requested wrt parameters.
for (auto i : indices.parameters->getIndices()) {
auto origParam = origParams[i];
->getCanonicalType(), origParam.getConvention()));
Mangle::ASTMangler mangler;
auto pbName = original->getASTContext().getIdentifier(
original->getName(), AutoDiffLinearMapKind::Pullback,
auto pbGenericSig = getDerivativeGenericSignature(attr, original);
auto *pbGenericEnv =
pbGenericSig ? pbGenericSig->getGenericEnvironment() : nullptr;
auto pbType = SILFunctionType::get(
pbGenericSig, origTy->getExtInfo(), origTy->getCoroutineKind(),
origTy->getCalleeConvention(), pbParams, {}, adjResults, None,
SILOptFunctionBuilder fb(context.getTransform());
// The generated pullback linkage is set to Hidden because generated
// pullbacks are never called cross-module.
auto linkage = SILLinkage::Hidden;
auto *pullback = fb.createFunction(
linkage, pbName, pbType, pbGenericEnv, original->getLocation(),
original->isBare(), IsNotTransparent, original->isSerialized(),
pullback->setDebugScope(new (module)
return pullback;
/// Run VJP generation. Returns true on error.
bool run();
void postProcess(SILInstruction *orig, SILInstruction *cloned) {
if (errorOccurred)
SILClonerWithScopes::postProcess(orig, cloned);
/// Remap original basic blocks, adding predecessor enum arguments.
SILBasicBlock *remapBasicBlock(SILBasicBlock *bb) {
auto *vjpBB = BBMap[bb];
// If error has occurred, or if block has already been remapped, return
// remapped, return remapped block.
if (errorOccurred || remappedBasicBlocks.count(bb))
return vjpBB;
// Add predecessor enum argument to the remapped block.
auto *predEnum = pullbackInfo.getBranchingTraceDecl(bb);
auto enumTy = getOpASTType(predEnum->getDeclaredInterfaceType()
auto enumLoweredTy = context.getTypeConverter().getLoweredType(
enumTy, ResilienceExpansion::Minimal);
vjpBB->createPhiArgument(enumLoweredTy, ValueOwnershipKind::Owned);
return vjpBB;
/// General visitor for all instructions. If any error is emitted by previous
/// visits, bail out.
void visit(SILInstruction *inst) {
if (errorOccurred)
void visitSILInstruction(SILInstruction *inst) {
context.emitNondifferentiabilityError(inst, invoker,
errorOccurred = true;
/// Get the lowered SIL type of the given nominal type declaration.
SILType getNominalDeclLoweredType(NominalTypeDecl *nominal) {
auto nomType = getOpASTType(
auto nomSILType = context.getTypeConverter().getLoweredType(
nomType, ResilienceExpansion::Minimal);
return nomSILType;
/// Build a pullback struct value for the original block corresponding to the
/// given terminator.
StructInst *buildPullbackValueStructValue(TermInst *termInst) {
assert(termInst->getFunction() == original);
auto loc = termInst->getFunction()->getLocation();
auto *origBB = termInst->getParent();
auto *vjpBB = BBMap[origBB];
auto *pbStruct = pullbackInfo.getLinearMapStruct(origBB);
auto structLoweredTy = getNominalDeclLoweredType(pbStruct);
auto bbPullbackValues = pullbackValues[origBB];
if (!origBB->isEntry()) {
auto *predEnumArg = vjpBB->getArguments().back();
bbPullbackValues.insert(bbPullbackValues.begin(), predEnumArg);
return getBuilder().createStruct(loc, structLoweredTy, bbPullbackValues);
/// Build a predecessor enum instance using the given builder for the given
/// original predecessor/successor blocks and pullback struct value.
EnumInst *buildPredecessorEnumValue(SILBuilder &builder,
SILBasicBlock *predBB,
SILBasicBlock *succBB,
SILValue pbStructVal) {
auto loc = pbStructVal.getLoc();
auto *succEnum = pullbackInfo.getBranchingTraceDecl(succBB);
auto enumLoweredTy = getNominalDeclLoweredType(succEnum);
auto *enumEltDecl =
pullbackInfo.lookUpBranchingTraceEnumElement(predBB, succBB);
auto enumEltType = getOpType(
enumLoweredTy.getEnumElementType(enumEltDecl, getModule()));
// If the enum element type does not have a box type (i.e. the enum case is
// not indirect), then directly create an enum.
auto boxType = dyn_cast<SILBoxType>(enumEltType.getASTType());
if (!boxType)
return builder.createEnum(loc, pbStructVal, enumEltDecl, enumLoweredTy);
// Otherwise, box the pullback struct value and create an enum.
auto *newBox = builder.createAllocBox(loc, boxType);
loc, newBox, [&](SILValue borrowedBox) {
auto *projectBox = builder.createProjectBox(loc, newBox, /*index*/ 0);
builder.emitStoreValueOperation(loc, pbStructVal, projectBox,
return builder.createEnum(loc, newBox, enumEltDecl, enumLoweredTy);
void visitReturnInst(ReturnInst *ri) {
auto loc = ri->getOperand().getLoc();
auto *origExit = ri->getParent();
auto &builder = getBuilder();
auto *pbStructVal = buildPullbackValueStructValue(ri);
// Get the value in the VJP corresponding to the original result.
auto *origRetInst = cast<ReturnInst>(origExit->getTerminator());
auto origResult = getOpValue(origRetInst->getOperand());
SmallVector<SILValue, 8> origResults;
extractAllElements(origResult, builder, origResults);
// Get and partially apply the pullback.
auto vjpGenericEnv = vjp->getGenericEnvironment();
auto vjpSubstMap = vjpGenericEnv
? vjpGenericEnv->getForwardingSubstitutionMap()
: vjp->getForwardingSubstitutionMap();
auto *pullbackRef = builder.createFunctionRef(loc, pullback);
auto *pullbackPartialApply = builder.createPartialApply(
loc, pullbackRef, vjpSubstMap, {pbStructVal},
// Return a tuple of the original result and pullback.
SmallVector<SILValue, 8> directResults;
directResults.append(origResults.begin(), origResults.end());
ri->getLoc(), joinElements(directResults, builder, loc));
void visitBranchInst(BranchInst *bi) {
// Build pullback struct value for original block.
// Build predecessor enum value for destination block.
auto *origBB = bi->getParent();
auto *pbStructVal = buildPullbackValueStructValue(bi);
auto *enumVal = buildPredecessorEnumValue(
getBuilder(), origBB, bi->getDestBB(), pbStructVal);
// Remap arguments, appending the new enum values.
SmallVector<SILValue, 8> args;
for (auto origArg : bi->getArgs())
// Create a new `br` instruction.
bi->getLoc(), getOpBasicBlock(bi->getDestBB()), args);
void visitCondBranchInst(CondBranchInst *cbi) {
// Build pullback struct value for original block.
// Build predecessor enum values for true/false blocks.
auto *origBB = cbi->getParent();
auto *pbStructVal = buildPullbackValueStructValue(cbi);
// Creates a trampoline block for given original successor block. The
// trampoline block has the same arguments as the VJP successor block but
// drops the last predecessor enum argument. The generated `switch_enum`
// instruction branches to the trampoline block, and the trampoline block
// constructs a predecessor enum value and branches to the VJP successor
// block.
auto createTrampolineBasicBlock =
[&](SILBasicBlock *origSuccBB) -> SILBasicBlock * {
auto *vjpSuccBB = getOpBasicBlock(origSuccBB);
// Create the trampoline block.
auto *trampolineBB = vjp->createBasicBlockBefore(vjpSuccBB);
for (auto *arg : vjpSuccBB->getArguments().drop_back())
// Build predecessor enum value for successor block and branch to it.
SILBuilder trampolineBuilder(trampolineBB);
auto *succEnumVal = buildPredecessorEnumValue(
trampolineBuilder, origBB, origSuccBB, pbStructVal);
SmallVector<SILValue, 4> forwardedArguments(
trampolineBuilder.createBranch(cbi->getLoc(), vjpSuccBB,
return trampolineBB;
// Create a new `cond_br` instruction.
cbi->getLoc(), getOpValue(cbi->getCondition()),
void visitSwitchEnumInst(SwitchEnumInst *sei) {
// Build pullback struct value for original block.
auto *origBB = sei->getParent();
auto *pbStructVal = buildPullbackValueStructValue(sei);
// Creates a trampoline block for given original successor block. The
// trampoline block has the same arguments as the VJP successor block but
// drops the last predecessor enum argument. The generated `switch_enum`
// instruction branches to the trampoline block, and the trampoline block
// constructs a predecessor enum value and branches to the VJP successor
// block.
auto createTrampolineBasicBlock =
[&](SILBasicBlock *origSuccBB) -> SILBasicBlock * {
auto *vjpSuccBB = getOpBasicBlock(origSuccBB);
// Create the trampoline block.
auto *trampolineBB = vjp->createBasicBlockBefore(vjpSuccBB);
for (auto *destArg : vjpSuccBB->getArguments().drop_back()) {
auto *trampolineArg = trampolineBB->createPhiArgument(
destArg->getType(), destArg->getOwnershipKind());
// Each `@guaranteed` trampoline argument needs to have a
// lifetime-ending use past its destination argument's lifetime-ending
// uses, so we keep track of these pairs of arguments in
// `trampolinedGuaranteedPhiArguments` and emit `end_borrow`s when
// function cloning is finished.
if (trampolineArg->getOwnershipKind() == ValueOwnershipKind::Guaranteed)
{trampolineArg, cast<SILPhiArgument>(destArg)});
// Build predecessor enum value for successor block and branch to it.
SILBuilder trampolineBuilder(trampolineBB);
auto *succEnumVal = buildPredecessorEnumValue(
trampolineBuilder, origBB, origSuccBB, pbStructVal);
SmallVector<SILValue, 4> forwardedArguments(
trampolineBuilder.createBranch(sei->getLoc(), vjpSuccBB,
return trampolineBB;
// Create trampoline successor basic blocks.
SmallVector<std::pair<EnumElementDecl *, SILBasicBlock *>, 4> caseBBs;
for (unsigned i : range(sei->getNumCases())) {
auto caseBB = sei->getCase(i);
auto *trampolineBB = createTrampolineBasicBlock(caseBB.second);
caseBBs.push_back({caseBB.first, trampolineBB});
// Create trampoline default basic block.
SILBasicBlock *newDefaultBB = nullptr;
if (auto *defaultBB = sei->getDefaultBBOrNull().getPtrOrNull())
newDefaultBB = createTrampolineBasicBlock(defaultBB);
// Create a new `switch_enum` instruction.
sei->getLoc(), getOpValue(sei->getOperand()), newDefaultBB, caseBBs);
// If an `apply` has active results or active inout parameters, replace it
// with an `apply` of its VJP.
void visitApplyInst(ApplyInst *ai) {
// If the function should not be differentiated or its the array literal
// initialization intrinsic, just do standard cloning.
if (!pullbackInfo.shouldDifferentiateApplyInst(ai) ||
isArrayLiteralIntrinsic(ai)) {
LLVM_DEBUG(getADDebugStream() << "No active results:\n" << *ai << '\n');
// Check and reject functions with active inout arguments. It's not yet
// supported.
auto paramInfos = ai->getSubstCalleeConv().getParameters();
auto paramArgs = ai->getArgumentsWithoutIndirectResults();
for (unsigned i : swift::indices(paramInfos)) {
if (paramInfos[i].isIndirectInOut() &&
activityInfo.isActive(paramArgs[i], getIndices())) {
context.emitNondifferentiabilityError(ai, invoker,
errorOccurred = true;
LLVM_DEBUG(getADDebugStream() << "VJP-transforming:\n" << *ai << '\n');
// Get the minimal parameter and result indices required for differentiating
// this `apply`.
SmallVector<SILValue, 4> allResults;
SmallVector<unsigned, 8> activeParamIndices;
SmallVector<unsigned, 8> activeResultIndices;
collectMinimalIndicesForFunctionCall(ai, getIndices(), activityInfo,
allResults, activeParamIndices,
assert(!activeParamIndices.empty() && "Parameter indices cannot be empty");
assert(!activeResultIndices.empty() && "Result indices cannot be empty");
LLVM_DEBUG(auto &s = getADDebugStream() << "Active indices: params={";
interleave(activeParamIndices.begin(), activeParamIndices.end(),
[&s](unsigned i) { s << i; }, [&s] { s << ", "; });
s << "}, results={"; interleave(
activeResultIndices.begin(), activeResultIndices.end(),
[&s](unsigned i) { s << i; }, [&s] { s << ", "; });
s << "}\n";);
// FIXME: We don't support multiple active results yet.
if (activeResultIndices.size() > 1) {
ai, invoker, diag::autodiff_expression_not_differentiable_note);
errorOccurred = true;
// Form expected indices, assuming there's only one result.
SILAutoDiffIndices indices(
getASTContext(), ai->getArgumentsWithoutIndirectResults().size(),
// Emit the VJP.
auto loc = ai->getLoc();
auto &builder = getBuilder();
auto original = getOpValue(ai->getCallee());
SILValue vjpValue;
// If functionSource is a `@differentiable` function, just extract it.
auto originalFnTy = original->getType().castTo<SILFunctionType>();
if (originalFnTy->isDifferentiable()) {
auto paramIndices = originalFnTy->getDifferentiationParameterIndices();
for (auto i : indices.parameters->getIndices()) {
if (!paramIndices->contains(i)) {
context.emitNondifferentiabilityError(original, invoker,
errorOccurred = true;
auto borrowedDiffFunc = builder.emitBeginBorrowOperation(loc, original);
vjpValue = builder.createDifferentiableFunctionExtract(
loc, NormalDifferentiableFunctionTypeComponent::VJP,
vjpValue = builder.emitCopyValueOperation(loc, vjpValue);
// Check and diagnose non-differentiable original function type.
auto diagnoseNondifferentiableOriginalFunctionType =
[&](CanSILFunctionType origFnTy) {
// Check and diagnose non-differentiable arguments.
for (unsigned paramIndex : range(originalFnTy->getNumParameters())) {
if (indices.isWrtParameter(paramIndex) &&
.isDifferentiable(getModule())) {
ai->getArgumentsWithoutIndirectResults()[paramIndex], invoker,
errorOccurred = true;
return true;
// Check and diagnose non-differentiable results.
if (!originalFnTy->getResults()[indices.source]
.isDifferentiable(getModule())) {
original, invoker, diag::autodiff_nondifferentiable_result);
errorOccurred = true;
return true;
return false;
if (diagnoseNondifferentiableOriginalFunctionType(originalFnTy))
// If VJP has not yet been found, emit an `differentiable_function`
// instruction on the remapped original function operand and
// an `differentiable_function_extract` instruction to get the VJP.
// The `differentiable_function` instruction will be canonicalized during
// the transform main loop.
if (!vjpValue) {
// FIXME: Handle indirect differentiation invokers. This may require some
// redesign: currently, each original function + attribute pair is mapped
// only to one invoker.
DifferentiationInvoker indirect(ai, attr);
auto insertion =
context.getInvokers().try_emplace({this->original, attr}, indirect);
auto &invoker = insertion.first->getSecond();
invoker = indirect;
// If the original `apply` instruction has a substitution map, then the
// applied function is specialized.
// In the VJP, specialization is also necessary for parity. The original
// function operand is specialized with a remapped version of same
// substitution map using an argument-less `partial_apply`.
if (ai->getSubstitutionMap().empty()) {
original = builder.emitCopyValueOperation(loc, original);
} else {
auto substMap = getOpSubstitutionMap(ai->getSubstitutionMap());
auto vjpPartialApply = getBuilder().createPartialApply(
ai->getLoc(), original, substMap, {},
original = vjpPartialApply;
originalFnTy = original->getType().castTo<SILFunctionType>();
// Diagnose if new original function type is non-differentiable.
if (diagnoseNondifferentiableOriginalFunctionType(originalFnTy))
auto *diffFuncInst = context.createDifferentiableFunction(
getBuilder(), loc, indices.parameters, original);
// Record the `differentiable_function` instruction.
// TODO(TF-689): Make `differentiable_function` store result indices and
// remove `ADContext::resultIndices`.
context.getResultIndices()[diffFuncInst] = activeResultIndices.front();
auto borrowedADFunc =
builder.emitBeginBorrowOperation(loc, diffFuncInst);
auto extractedVJP = getBuilder().createDifferentiableFunctionExtract(
loc, NormalDifferentiableFunctionTypeComponent::VJP,
vjpValue = builder.emitCopyValueOperation(loc, extractedVJP);
builder.emitEndBorrowOperation(loc, borrowedADFunc);
builder.emitDestroyValueOperation(loc, diffFuncInst);
// Record desired/actual VJP indices.
// Temporarily set original pullback type to `None`.
NestedApplyInfo info{indices, /*originalPullbackType*/ None};
auto insertion = context.getNestedApplyInfo().try_emplace(ai, info);
auto &nestedApplyInfo = insertion.first->getSecond();
nestedApplyInfo = info;
// Call the VJP using the original parameters.
SmallVector<SILValue, 8> vjpArgs;
auto vjpFnTy = getOpType(vjpValue->getType()).castTo<SILFunctionType>();
auto numVJPArgs =
vjpFnTy->getNumParameters() + vjpFnTy->getNumIndirectFormalResults();
// Collect substituted arguments.
for (auto origArg : ai->getArguments())
assert(vjpArgs.size() == numVJPArgs);
// Apply the VJP.
// The VJP should be specialized, so no substitution map is necessary.
auto *vjpCall = getBuilder().createApply(loc, vjpValue, SubstitutionMap(),
vjpArgs, ai->isNonThrowing());
LLVM_DEBUG(getADDebugStream() << "Applied vjp function\n" << *vjpCall);
builder.emitDestroyValueOperation(loc, vjpValue);
// Get the VJP results (original results and pullback).
SmallVector<SILValue, 8> vjpDirectResults;
extractAllElements(vjpCall, getBuilder(), vjpDirectResults);
ArrayRef<SILValue> originalDirectResults =
SILValue originalDirectResult = joinElements(originalDirectResults,
SILValue pullback = vjpDirectResults.back();
// Store the original result to the value map.
mapValue(ai, originalDirectResult);
// Checkpoint the pullback.
auto *pullbackDecl = pullbackInfo.lookUpLinearMapDecl(ai);
// If actual pullback type does not match lowered pullback type, reabstract
// the pullback using a thunk.
auto actualPullbackType =
auto vjpGenSig = SubsMap.getGenericSignature()
? SubsMap.getGenericSignature()->getCanonicalSignature()
: nullptr;
Lowering::GenericContextScope genericContextScope(
context.getTypeConverter(), vjpGenSig);
auto loweredPullbackType =
if (!loweredPullbackType->isEqual(actualPullbackType)) {
// Set non-reabstracted original pullback type in nested apply info.
nestedApplyInfo.originalPullbackType = actualPullbackType;
SILOptFunctionBuilder fb(context.getTransform());
auto *thunk = getOrCreateReabstractionThunk(
fb, getModule(), loc, /*caller*/ vjp, actualPullbackType,
auto *thunkRef = getBuilder().createFunctionRef(loc, thunk);
pullback = getBuilder().createPartialApply(
ai->getLoc(), thunkRef,
{pullback}, actualPullbackType->getCalleeConvention());
// Some instructions that produce the callee may have been cloned.
// If the original callee did not have any users beyond this `apply`,
// recursively kill the cloned callee.
if (auto *origCallee = cast_or_null<SingleValueInstruction>(
if (origCallee->hasOneUse())
void visitDifferentiableFunctionInst(DifferentiableFunctionInst *dfi) {
// Clone `differentiable_function` from original to VJP, then add the cloned
// instruction to the `differentiable_function` worklist.
auto *newDFI = cast<DifferentiableFunctionInst>(getOpValue(dfi));
} // end anonymous namespace
// AdjointValue - a symbolic representation for adjoint values that allows
// for efficient differentiation of aggregates.
namespace {
class PullbackEmitter;
class AdjointValue;
enum AdjointValueKind {
/// An empty adjoint, i.e. zero. This case exists due to its special
/// mathematical properties: `0 + x = x`. This is a guaranteed optimization
/// when we combine a zero adjoint with another (e.g. differentiating a
/// fanout).
/// An aggregate of adjoint values.
/// A concrete SIL value.
class AdjointValueBase {
friend class AdjointValue;
/// The kind of this adjoint value.
AdjointValueKind kind;
/// The type of this value as if it were materialized as a SIL value.
SILType type;
/// The underlying value.
union Value {
ArrayRef<AdjointValue> aggregate;
SILValue concrete;
Value(ArrayRef<AdjointValue> v) : aggregate(v) {}
Value(SILValue v) : concrete(v) {}
Value() {}
} value;
explicit AdjointValueBase(SILType type,
ArrayRef<AdjointValue> aggregate)
: kind(AdjointValueKind::Aggregate), type(type), value(aggregate) {}
explicit AdjointValueBase(SILValue v)
: kind(AdjointValueKind::Concrete), type(v->getType()), value(v) {}
explicit AdjointValueBase(SILType type)
: kind(AdjointValueKind::Zero), type(type) {}
/// A symbolic adjoint value that is capable of representing zero value 0 and
/// 1, in addition to a materialized SILValue. This is expected to be passed
/// around by value in most cases, as it's two words long.
class AdjointValue final {
friend class PullbackEmitter;
/// The kind of this adjoint value.
AdjointValueBase *base;
/*implicit*/ AdjointValue(AdjointValueBase *base = nullptr) : base(base) {}
AdjointValueBase *operator->() const { return base; }
AdjointValueBase &operator*() const { return *base; }
static AdjointValue createConcrete(llvm::BumpPtrAllocator &allocator,
SILValue value) {
return new (allocator.Allocate<AdjointValueBase>()) AdjointValueBase(value);
template<typename EltRange>
static AdjointValue createAggregate(llvm::BumpPtrAllocator &allocator,
SILType type, EltRange elements) {
AdjointValue *buf = reinterpret_cast<AdjointValue *>(allocator.Allocate(
elements.size() * sizeof(AdjointValue), alignof(AdjointValue)));
MutableArrayRef<AdjointValue> elementsCopy(buf, elements.size());
std::uninitialized_copy(elements.begin(), elements.end(),
return new (allocator.Allocate<AdjointValueBase>())
AdjointValueBase(type, elementsCopy);
static AdjointValue createZero(llvm::BumpPtrAllocator &allocator,
SILType type) {
return new (allocator.Allocate<AdjointValueBase>()) AdjointValueBase(type);
AdjointValueKind getKind() const { return base->kind; }
SILType getType() const { return base->type; }
CanType getSwiftType() const { return getType().getASTType(); }
NominalTypeDecl *getAnyNominal() const {
return getSwiftType()->getAnyNominal();
bool isZero() const { return getKind() == AdjointValueKind::Zero; }
bool isAggregate() const { return getKind() == AdjointValueKind::Aggregate; }
bool isConcrete() const { return getKind() == AdjointValueKind::Concrete; }
unsigned getNumAggregateElements() const {
return base->value.aggregate.size();
AdjointValue getAggregateElement(unsigned i) const {
return base->value.aggregate[i];
ArrayRef<AdjointValue> getAggregateElements() const {
return base->value.aggregate;
SILValue getConcreteValue() const {
return base->value.concrete;
void print(llvm::raw_ostream &s) const {
switch (getKind()) {
case AdjointValueKind::Zero:
s << "Zero";
case AdjointValueKind::Aggregate:
s << "Aggregate<";
if (auto *decl =
getType().getASTType()->getStructOrBoundGenericStruct()) {
s << "Struct>(";
[&s](std::tuple<VarDecl *,
const AdjointValue &> elt) {
s << std::get<0>(elt)->getName() << ": ";
}, [&s] { s << ", "; });
} else if (auto tupleType = getType().getAs<TupleType>()) {
s << "Tuple>(";
[&s](const AdjointValue &elt) { elt.print(s); },
[&s] { s << ", "; });
} else {
llvm_unreachable("Invalid aggregate");
s << ')';
case AdjointValueKind::Concrete:
s << "Concrete(" << base->value.concrete << ')';
inline llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
const AdjointValue &adjVal) {
return os;
} // end anonymous namespace
namespace {
class JVPEmitter final
: public TypeSubstCloner<JVPEmitter, SILOptFunctionBuilder> {
/// The global context.
ADContext &context;
/// The original function.
SILFunction *const original;
/// The `[differentiable]` attribute.
SILDifferentiableAttr *const attr;
/// The JVP function.
SILFunction *const jvp;
llvm::BumpPtrAllocator allocator;
/// The differentiation invoker.
DifferentiationInvoker invoker;
/// Info from activity analysis on the original function.
const DifferentiableActivityInfo &activityInfo;
/// The differential info.
LinearMapInfo differentialInfo;
bool errorOccurred = false;
// Differential generation related fields
/// The builder for the differential function.
SILBuilder differentialBuilder;
/// Mapping from original basic blocks to corresponding differential basic
/// blocks.
DenseMap<SILBasicBlock *, SILBasicBlock *> diffBBMap;
/// Mapping from original basic blocks and original values to corresponding
/// tangent values.
DenseMap<SILValue, AdjointValue> tangentValueMap;
/// Mapping from original basic blocks and original buffers to corresponding
/// tangent buffers.
DenseMap<std::pair<SILBasicBlock *, SILValue>, SILValue> bufferMap;
/// Mapping from differential basic blocks to differential struct arguments.
DenseMap<SILBasicBlock *, SILArgument *> differentialStructArguments;
/// Mapping from differential struct field declarations to differential struct
/// elements destructured from the linear map basic block argument. In the
/// beginning of each differential basic block, the block's differential
/// struct is destructured into the individual elements stored here.
DenseMap<VarDecl *, SILValue> differentialStructElements;
/// An auxiliary differential local allocation builder.
SILBuilder diffLocalAllocBuilder;
/// Stack buffers allocated for storing local tangent values.
SmallVector<SILValue, 8> differentialLocalAllocations;
/// Mapping from original blocks to differential values. Used to build
/// differential struct instances.
DenseMap<SILBasicBlock *, SmallVector<SILValue, 8>> differentialValues;
// Getters
ASTContext &getASTContext() const { return jvp->getASTContext(); }
SILModule &getModule() const { return jvp->getModule(); }
const SILAutoDiffIndices &getIndices() const { return attr->getIndices(); }
SILBuilder &getDifferentialBuilder() { return differentialBuilder; }
SILFunction &getDifferential() {
return differentialBuilder.getFunction();
SILArgument *getDifferentialStructArgument(SILBasicBlock *origBB) {
#ifndef NDEBUG
auto *diffStruct = differentialStructArguments[origBB]->getType()
assert(diffStruct == differentialInfo.getLinearMapStruct(origBB));
return differentialStructArguments[origBB];
// Initialization helpers
static SubstitutionMap getSubstitutionMap(SILFunction *original,
SILFunction *jvp) {
auto substMap = original->getForwardingSubstitutionMap();
if (auto *jvpGenEnv = jvp->getGenericEnvironment()) {
auto jvpSubstMap = jvpGenEnv->getForwardingSubstitutionMap();
substMap = SubstitutionMap::get(
jvpGenEnv->getGenericSignature(), QuerySubstitutionMap{jvpSubstMap},
return substMap;
/// Returns the activity info about the SILValues in the original function.
static const DifferentiableActivityInfo &getActivityInfo(
ADContext &context, SILFunction *original,
const SILAutoDiffIndices &indices, SILFunction *jvp) {
// Get activity info of the original function.
auto &passManager = context.getPassManager();
auto *activityAnalysis =
auto &activityCollection = *activityAnalysis->get(original);
auto &activityInfo = activityCollection.getActivityInfo(
dumpActivityInfo(*original, indices, activityInfo, getADDebugStream()));
return activityInfo;
// Differential struct mapping
void initializeDifferentialStructElements(SILBasicBlock *origBB,
SILInstructionResultArray values) {
auto *diffStructDecl = differentialInfo.getLinearMapStruct(origBB);
assert(diffStructDecl->getStoredProperties().size() == values.size() &&
"The number of differential struct fields must equal the number of "
"differential struct element values");
for (auto pair : llvm::zip(diffStructDecl->getStoredProperties(), values)) {
std::get<1>(pair).getOwnershipKind() != ValueOwnershipKind::Guaranteed
&& "Differential struct elements must be @owned");
auto insertion = differentialStructElements.insert({std::get<0>(pair),
assert(insertion.second &&
"A differential struct element mapping already exists!");
SILValue getDifferentialStructElement(SILBasicBlock *origBB, VarDecl *field) {
assert(differentialInfo.getLinearMapStruct(origBB) ==
assert(differentialStructElements.count(field) &&
"Differential struct element for this field does not exist!");
return differentialStructElements.lookup(field);
// General utilities
SILBasicBlock::iterator getNextDifferentialLocalAllocationInsertionPoint() {
// If there are no local allocations, insert at the beginning of the tangent
// entry.
if (differentialLocalAllocations.empty())
return getDifferential().getEntryBlock()->begin();
// Otherwise, insert before the last local allocation. Inserting before
// rather than after ensures that allocation and zero initialization
// instructions are grouped together.
auto lastLocalAlloc = differentialLocalAllocations.back();
auto it = lastLocalAlloc->getDefiningInstruction()->getIterator();
return it;
/// Get the lowered SIL type of the given nominal type declaration.
SILType getNominalDeclLoweredType(NominalTypeDecl *nominal) {
auto nomType =
auto nomSILType = context.getTypeConverter().getLoweredType(
nomType, ResilienceExpansion::Minimal);
return nomSILType;
/// Build a differential struct value for the original block corresponding to
/// the given terminator.
StructInst *buildDifferentialValueStructValue(TermInst *termInst) {
assert(termInst->getFunction() == original);
auto loc = termInst->getFunction()->getLocation();
auto *origBB = termInst->getParent();
auto *jvpBB = BBMap[origBB];
assert(jvpBB && "Basic block mapping should exist");
auto *diffStruct = differentialInfo.getLinearMapStruct(origBB);
assert(diffStruct && "The differential struct should have been declared");
auto structLoweredTy = getNominalDeclLoweredType(diffStruct);
auto bbDifferentialValues = differentialValues[origBB];
if (!origBB->isEntry()) {
auto *enumArg = jvpBB->getArguments().back();
bbDifferentialValues.insert(bbDifferentialValues.begin(), enumArg);
return getBuilder().createStruct(loc, structLoweredTy,
// Tangent value factory methods
AdjointValue makeZeroTangentValue(SILType type) {
return AdjointValue::createZero(
allocator, remapSILTypeInDifferential(type));
AdjointValue makeConcreteTangentValue(SILValue value) {
return AdjointValue::createConcrete(allocator, value);
// Tangent materialization
void emitZeroIndirect(CanType type, SILValue bufferAccess,
SILLocation loc) {
auto builder = getDifferentialBuilder();
auto tangentSpace = getTangentSpace(type);
assert(tangentSpace && "No tangent space for this type");
switch (tangentSpace->getKind()) {
case VectorSpace::Kind::Vector:
emitZeroIntoBuffer(builder, type, bufferAccess, loc);
case VectorSpace::Kind::Tuple: {
auto tupleType = tangentSpace->getTuple();
SmallVector<SILValue, 8> zeroElements;
for (unsigned i : range(tupleType->getNumElements())) {
auto eltAddr = builder.createTupleElementAddr(loc, bufferAccess, i);
eltAddr, loc);
case VectorSpace::Kind::Function: {
"Unimplemented: Emit thunks for abstracting zero initialization");
SILValue emitZeroDirect(CanType type, SILLocation loc) {
auto diffBuilder = getDifferentialBuilder();
auto silType = getModule().Types.getLoweredLoadableType(
type, ResilienceExpansion::Minimal, getModule());
auto *buffer = diffBuilder.createAllocStack(loc, silType);
emitZeroIndirect(type, buffer, loc);
auto loaded = diffBuilder.emitLoadValueOperation(
loc, buffer, LoadOwnershipQualifier::Take);
diffBuilder.createDeallocStack(loc, buffer);
return loaded;
SILValue materializeTangentDirect(AdjointValue val, SILLocation loc) {
<< "Materializing tangents for " << val << '\n');
switch (val.getKind()) {
case AdjointValueKind::Zero: {
auto zeroVal = emitZeroDirect(val.getSwiftType(), loc);
return zeroVal;
case AdjointValueKind::Aggregate:
"Tuples and structs are not supported in forward mode yet.");
case AdjointValueKind::Concrete:
return val.getConcreteValue();
SILValue materializeTangent(AdjointValue val, SILLocation loc) {
if (val.isConcrete()) {
<< "Materializing tangent: Value is concrete.\n");
return val.getConcreteValue();
LLVM_DEBUG(getADDebugStream() << "Materializing tangent: Value is "
"non-concrete. Materializing directly.\n");
return materializeTangentDirect(val, loc);
// Tangent buffer mapping
void setTangentBuffer(SILBasicBlock *origBB, SILValue originalBuffer,
SILValue tangentBuffer) {
auto insertion =
bufferMap.try_emplace({origBB, originalBuffer}, tangentBuffer);
assert(insertion.second && "tangent buffer already exists.");
SILValue &getTangentBuffer(SILBasicBlock *origBB, SILValue originalBuffer) {
assert(originalBuffer->getFunction() == original);
auto insertion = bufferMap.try_emplace({origBB, originalBuffer},
assert(!insertion.second && "tangent buffer should already exist");
return insertion.first->getSecond();
// Differential type calculations
/// Substitutes all replacement types of the given substitution map using the
/// tangent function's substitution map.
SubstitutionMap remapSubstitutionMapInDifferential(SubstitutionMap substMap) {
return substMap.subst(getDifferential().getForwardingSubstitutionMap());
/// Remap any archetypes into the differential function's context.
Type remapTypeInDifferential(Type ty) {
if (ty->hasArchetype())
return getDifferential().mapTypeIntoContext(ty->mapTypeOutOfContext());
return getDifferential().mapTypeIntoContext(ty);
/// Remap any archetypes into the differential function's context.
SILType remapSILTypeInDifferential(SILType ty) {
if (ty.hasArchetype())
return getDifferential().mapTypeIntoContext(ty.mapTypeOutOfContext());
return getDifferential().mapTypeIntoContext(ty);
/// Find the tangent space of a given canonical type.
Optional<VectorSpace> getTangentSpace(CanType type) {
return type->getAutoDiffAssociatedTangentSpace(
/// Assuming the given type conforms to `Differentiable` after remapping,
/// returns the associated tangent space SIL type.
SILType getRemappedTangentType(SILType type) {
return SILType::getPrimitiveType(
// Tangent value mapping
/// Get the tangent for an original value. The given value must be in the
/// original function.
/// This method first tries to find an entry in `tangentValueMap`. If an entry
/// doesn't exist, create a zero tangent.
AdjointValue getTangentValue(SILValue originalValue) {
assert(originalValue->getFunction() == original);
auto insertion = tangentValueMap.try_emplace(
originalValue, makeZeroTangentValue(
return insertion.first->getSecond();
/// Map the tangent value to the given original value.
void setTangentValue(SILBasicBlock *origBB, SILValue originalValue,
AdjointValue newTangentValue) {
if (auto *defInst = originalValue->getDefiningInstruction()) {
bool isTupleTypedApplyResult =
isa<ApplyInst>(defInst) && originalValue->getType().is<TupleType>();
assert(!isTupleTypedApplyResult &&
"Should not set tangent value for tuple-typed result from `apply` "
"instruction; use `destructure_tuple` on `apply` result and set "
"tangent value for `destructure_tuple` results instead.");
assert(originalValue->getFunction() == original);
LLVM_DEBUG(getADDebugStream() << "Adding tangent for " << originalValue);
// The tangent value must be in the tangent space.
assert(newTangentValue.getType() ==
auto insertion =
tangentValueMap.try_emplace(originalValue, newTangentValue);
auto inserted = insertion.second;
assert(inserted && "The tangent value should not already exist.");
// Tangent emission helpers
void visit##INST##Inst(INST##Inst *inst) { \
TypeSubstCloner::visit##INST##Inst(inst); \
if (differentialInfo.shouldDifferentiateInstruction(inst)) \
emitTangentFor##INST##Inst(inst); \
} \
void emitTangentFor##INST##Inst(INST##Inst *(ID))
CLONE_AND_EMIT_TANGENT(BeginBorrow, bbi) {
auto &diffBuilder = getDifferentialBuilder();
auto loc = bbi->getLoc();
auto tanVal = materializeTangent(getTangentValue(bbi->getOperand()), loc);
auto tanValBorrow = diffBuilder.emitBeginBorrowOperation(loc, tanVal);
setTangentValue(bbi->getParent(), bbi,
auto &diffBuilder = getDifferentialBuilder();
auto loc = ebi->getLoc();
auto tanVal = materializeTangent(getTangentValue(ebi->getOperand()), loc);
diffBuilder.emitEndBorrowOperation(loc, tanVal);
CLONE_AND_EMIT_TANGENT(DestroyValue, dvi) {
auto &diffBuilder = getDifferentialBuilder();
auto loc = dvi->getLoc();
auto tanVal = materializeTangent(getTangentValue(dvi->getOperand()), loc);
diffBuilder.emitDestroyValue(loc, tanVal);
auto &diffBuilder = getDifferentialBuilder();
auto tan = getTangentValue(cvi->getOperand());
auto tanVal = materializeTangent(tan, cvi->getLoc());
auto tanValCopy = diffBuilder.emitCopyValueOperation(cvi->getLoc(), tanVal);
setTangentValue(cvi->getParent(), cvi,
/// Handle `load` instruction.
/// Original: y = load x
/// Tangent: tan[y] = load tan[x]
auto &diffBuilder = getDifferentialBuilder();
auto *bb = li->getParent();
auto loc = li->getLoc();
auto tanBuf = getTangentBuffer(bb, li->getOperand());
auto tanVal = diffBuilder.emitLoadValueOperation(
loc, tanBuf, li->getOwnershipQualifier());
setTangentValue(bb, li, makeConcreteTangentValue(tanVal));
/// Handle `load_borrow` instruction.
/// Original: y = load_borrow x
/// Tangent: tan[y] = load_borrow tan[x]
auto &diffBuilder = getDifferentialBuilder();
auto *bb = lbi->getParent();
auto loc = lbi->getLoc();
auto tanBuf = getTangentBuffer(bb, lbi->getOperand());
auto tanVal = diffBuilder.emitLoadBorrowOperation(
loc, tanBuf);
setTangentValue(bb, lbi, makeConcreteTangentValue(tanVal));
/// Handle `store` instruction in the differential.
/// Original: store x to y
/// Tangent: store tan[x] to tan[y]
auto &diffBuilder = getDifferentialBuilder();
auto loc = si->getLoc();
auto tanValSrc = materializeTangent(getTangentValue(si->getSrc()), loc);
auto &tanValDest = getTangentBuffer(si->getParent(), si->getDest());
loc, tanValSrc, tanValDest, si->getOwnershipQualifier());
/// Handle `store_borrow` instruction in the differential.
/// Original: store_borrow x to y
/// Tangent: store_borrow tan[x] to tan[y]
CLONE_AND_EMIT_TANGENT(StoreBorrow, sbi) {
auto &diffBuilder = getDifferentialBuilder();
auto loc = sbi->getLoc();
auto tanValSrc = materializeTangent(getTangentValue(sbi->getSrc()), loc);
auto &tanValDest = getTangentBuffer(sbi->getParent(), sbi->getDest());
diffBuilder.createStoreBorrow(loc, tanValSrc, tanValDest);
/// Handle `copy_addr` instruction.
/// Original: copy_addr x to y
/// Tangent: copy_addr tan[x] to tan[y]
auto *diffGenEnv = getDifferential().getGenericEnvironment();
auto diffGenSig = diffGenEnv
? diffGenEnv->getGenericSignature()->getCanonicalSignature()
: nullptr;
Lowering::GenericContextScope genericContextScope(
context.getTypeConverter(), diffGenSig);
auto diffBuilder = getDifferentialBuilder();
auto loc = cai->getLoc();
auto *bb = cai->getParent();
auto &tanSrc = getTangentBuffer(bb, cai->getSrc());
auto tanDest = getTangentBuffer(bb, cai->getDest());
diffBuilder.createCopyAddr(loc, tanSrc, tanDest, cai->isTakeOfSrc(),
/// Handle `unconditional_checked_cast_addr` instruction.
/// Original: unconditional_checked_cast_addr $X in x to $Y in y
/// Tangent: unconditional_checked_cast_addr $X.Tan in tan[x]
/// to $Y.Tan in tan[y]
CLONE_AND_EMIT_TANGENT(UnconditionalCheckedCastAddr, uccai) {
auto diffBuilder = getDifferentialBuilder();
auto loc = uccai->getLoc();
auto *bb = uccai->getParent();
auto &tanSrc = getTangentBuffer(bb, uccai->getSrc());
auto tanDest = getTangentBuffer(bb, uccai->getDest());
loc, tanSrc, tanSrc->getType().getASTType(), tanDest,
/// Handle `begin_access` instruction (and do differentiability checks).
/// Original: y = begin_access x
/// Tangent: tan[y] = begin_access tan[x]
CLONE_AND_EMIT_TANGENT(BeginAccess, bai) {
// Check for non-differentiable writes.
if (bai->getAccessKind() == SILAccessKind::Modify) {
if (auto *gai = dyn_cast<GlobalAddrInst>(bai->getSource())) {
context.emitNondifferentiabilityError(bai, invoker,
errorOccurred = true;
if (auto *pbi = dyn_cast<ProjectBoxInst>(bai->getSource())) {
context.emitNondifferentiabilityError(bai, invoker,
errorOccurred = true;
auto &diffBuilder = getDifferentialBuilder();
auto *bb = bai->getParent();
auto tanSrc = getTangentBuffer(bb, bai->getSource());
auto *tanDest = diffBuilder.createBeginAccess(
bai->getLoc(), tanSrc, bai->getAccessKind(), bai->getEnforcement(),
bai->hasNoNestedConflict(), bai->isFromBuiltin());
setTangentBuffer(bb, bai, tanDest);
/// Handle `end_access` instruction.
/// Original: begin_access x
/// Tangent: end_access tan[x]
auto &diffBuilder = getDifferentialBuilder();
auto *bb = eai->getParent();
auto loc = eai->getLoc();
auto tanSrc = getTangentBuffer(bb, eai->getOperand());
diffBuilder.createEndAccess(loc, tanSrc, eai->isAborting());
/// Handle `alloc_stack` instruction.
/// Original: y = alloc_stack $T
/// Tangent: tan[y] = alloc_stack $T.Tangent
auto &diffBuilder = getDifferentialBuilder();
auto *mappedAllocStackInst = diffBuilder.createAllocStack(
asi->getLoc(), getRemappedTangentType(asi->getElementType()),
bufferMap.try_emplace({asi->getParent(), asi},
/// Handle `dealloc_stack` instruction.
/// Original: dealloc_stack x
/// Tangent: dealloc_stack tan[x]
CLONE_AND_EMIT_TANGENT(DeallocStack, dsi) {
auto &diffBuilder = getDifferentialBuilder();
auto tanBuf = getTangentBuffer(dsi->getParent(), dsi->getOperand());
diffBuilder.createDeallocStack(dsi->getLoc(), tanBuf);
/// Handle `destroy_addr` instruction.
/// Original: destroy_addr x
/// Tangent: destroy_addr tan[x]
CLONE_AND_EMIT_TANGENT(DestroyAddr, dai) {
auto &diffBuilder = getDifferentialBuilder();
auto tanBuf = getTangentBuffer(dai->getParent(), dai->getOperand());
diffBuilder.createDestroyAddr(dai->getLoc(), tanBuf);
/// Handle `struct` instruction.
/// Original: y = struct $T (x0, x1, x2, ...)
/// Tangent: tan[y] = struct $T.Tangent (tan[x0], tan[x1], tan[x2], ...)
auto &diffBuilder = getDifferentialBuilder();
SmallVector<SILValue, 4> tangentElements;
for (auto elem : si->getElements())
auto tanExtract = diffBuilder.createStruct(
si->getLoc(), getRemappedTangentType(si->getType()), tangentElements);
setTangentValue(si->getParent(), si, makeConcreteTangentValue(tanExtract));
/// Handle `struct_extract` instruction.
/// Original: y = struct_extract x, #field
/// Tangent: tan[y] = struct_extract tan[x], #field'
/// ^~~~~~~
/// field in tangent space corresponding to #field
CLONE_AND_EMIT_TANGENT(StructExtract, sei) {
assert(!sei->getField()->getAttrs().hasAttribute<NoDerivativeAttr>() &&
"`struct_extract` with `@noDerivative` field should not be "
"differentiated; activity analysis should not marked as varied.");
auto diffBuilder = getDifferentialBuilder();;
auto tangentVectorTy =
auto *tangentVectorDecl =
// Find the corresponding field in the tangent space.
VarDecl *tanField = nullptr;
// If the tangent space is the original struct, then field is the same.
if (tangentVectorDecl == sei->getStructDecl())
tanField = sei->getField();
// Otherwise, look up the field by name.
else {
auto tanFieldLookup =
if (tanFieldLookup.empty()) {
sei, invoker,
errorOccurred = true;
tanField = cast<VarDecl>(tanFieldLookup.front());
// Emit tangent `struct_extract`.
auto tanStruct =
materializeTangent(getTangentValue(sei->getOperand()), sei->getLoc());
auto tangentInst =
diffBuilder.createStructExtract(sei->getLoc(), tanStruct, tanField);
// Update tangent value mapping for `struct_extract` result.
auto tangentResult = makeConcreteTangentValue(tangentInst);
setTangentValue(sei->getParent(), sei, tangentResult);
/// Handle `struct_element_addr` instruction.
/// Original: y = struct_element_addr x, #field
/// Tangent: tan[y] = struct_element_addr tan[x], #field'
/// ^~~~~~~
/// field in tangent space corresponding to #field
CLONE_AND_EMIT_TANGENT(StructElementAddr, seai) {
assert(!seai->getField()->getAttrs().hasAttribute<NoDerivativeAttr>() &&
"`struct_element_addr` with `@noDerivative` field should not be "
"differentiated; activity analysis should not marked as varied.");
auto diffBuilder = getDifferentialBuilder();
auto *bb = seai->getParent();
auto tangentVectorTy =
auto *tangentVectorDecl =
// Find the corresponding field in the tangent space.
VarDecl *tanField = nullptr;
// If the tangent space is the original struct, then field is the same.
if (tangentVectorDecl == seai->getStructDecl())
tanField = seai->getField();
// Otherwise, look up the field by name.
else {
auto tanFieldLookup =
if (tanFieldLookup.empty()) {
seai, invoker,
errorOccurred = true;
tanField = cast<VarDecl>(tanFieldLookup.front());
// Emit tangent `struct_element_addr`.
auto tanOperand = getTangentBuffer(bb, seai->getOperand());
auto tangentInst = diffBuilder.createStructElementAddr(
seai->getLoc(), tanOperand, tanField);
// Update tangent buffer map for `struct_element_addr`.
setTangentBuffer(bb, seai, tangentInst);
/// Handle `tuple` instruction.
/// Original: y = tuple (x0, x1, x2, ...)
/// Tangent: tan[y] = tuple (tan[x0], tan[x1], tan[x2], ...)
auto diffBuilder = getDifferentialBuilder();
// Get the tangents of all the tuple elements.
SmallVector<SILValue, 8> tangentTupleElements;
for (auto elem : ti->getElements()) {
materializeTangent(getTangentValue(elem), ti->getLoc()));
// Emit the instruction and add the tangent mapping.
auto tanTuple = diffBuilder.createTuple(ti->getLoc(), tangentTupleElements);
setTangentValue(ti->getParent(), ti, makeConcreteTangentValue(tanTuple));
/// Handle `tuple_extract` instruction.
/// Original: y = tuple_extract x, <n>
/// Tangent: tan[y] = tuple_extract tan[x], <n'>
/// ^~~~
/// tuple tangent space index corresponding to n
CLONE_AND_EMIT_TANGENT(TupleExtract, tei) {
auto &diffBuilder = getDifferentialBuilder();
auto loc = tei->getLoc();
auto origTupleTy = tei->getOperand()->getType().castTo<TupleType>();
unsigned tanIndex = 0;
for (unsigned i : range(tei->getFieldNo())) {
if (getTangentSpace(
auto tanType = getRemappedTangentType(tei->getType());
auto tanSource = materializeTangent(
getTangentValue(tei->getOperand()), loc);
SILValue tanBuf;
// If the tangent buffer of the source does not have a tuple type, then
// it must represent a "single element tuple type". Use it directly.
if (!tanSource->getType().is<TupleType>()) {
setTangentValue(tei->getParent(), tei,
} else {
tanBuf = diffBuilder.createTupleExtract(loc, tanSource, tanIndex, tanType);
bufferMap.try_emplace({tei->getParent(), tei}, tanBuf);
/// Handle `tuple_element_addr` instruction.
/// Original: y = tuple_element_addr x, <n>
/// Tangent: tan[y] = tuple_element_addr tan[x], <n'>
/// ^~~~
/// tuple tangent space index corresponding to n
CLONE_AND_EMIT_TANGENT(TupleElementAddr, teai) {
auto &diffBuilder = getDifferentialBuilder();
auto origTupleTy = teai->getOperand()->getType().castTo<TupleType>();
unsigned tanIndex = 0;
for (unsigned i : range(teai->getFieldNo())) {
if (getTangentSpace(
auto tanType = getRemappedTangentType(teai->getType());
auto tanSource = getTangentBuffer(teai->getParent(), teai->getOperand());
SILValue tanBuf;
// If the tangent buffer of the source does not have a tuple type, then
// it must represent a "single element tuple type". Use it directly.
if (!tanSource->getType().is<TupleType>()) {
tanBuf = tanSource;
} else {
tanBuf = diffBuilder.createTupleElementAddr(
teai->getLoc(), tanSource, tanIndex, tanType);
bufferMap.try_emplace({teai->getParent(), teai}, tanBuf);
/// Handle `destructure_tuple` instruction.
/// Original: (y0, y1, ...) = destructure_tuple x, <n>
/// Tangent: (tan[y0], tan[y1], ...) = destructure_tuple tan[x], <n'>
/// ^~~~
/// tuple tangent space index corresponding to n
CLONE_AND_EMIT_TANGENT(DestructureTuple, dti) {
auto &diffBuilder = getDifferentialBuilder();
auto *bb = dti->getParent();
auto loc = dti->getLoc();
SmallVector<SILValue, 2> activeOrigResults;
bool hasActiveResult = false;
for (auto result : dti->getResults()) {
if (activityInfo.isActive(result, getIndices())) {
hasActiveResult = true;
assert(!activeOrigResults.empty() &&
"original 'destructure_tuple' should have at least one active "
auto tanTuple =
materializeTangent(getTangentValue(dti->getOperand()), loc);
auto *tupleElements = diffBuilder.createDestructureTuple(loc, tanTuple);
for (auto i : range(tupleElements->getNumResults())) {
auto origElem = dti->getResult(i);
auto tanElem = tupleElements->getResult(i);
setTangentValue(bb, origElem, makeConcreteTangentValue(tanElem));
/// Handle `apply` instruction.
/// Original: y = apply f(x0, x1, ...)
/// Tangent: tan[y] = apply diff_f(tan[x0], tan[x1], ...)
void emitTangentForApplyInst(ApplyInst *ai,
const SILAutoDiffIndices &actualIndices,
CanSILFunctionType originalDifferentialType) {
auto *bb = ai->getParent();
auto loc = ai->getLoc();
auto &diffBuilder = getDifferentialBuilder();
// Get the differential value.
auto *field = differentialInfo.lookUpLinearMapDecl(ai);
SILValue differential = getDifferentialStructElement(bb, field);
auto differentialType = remapSILTypeInDifferential(differential->getType())
// Get the differential arguments.
SmallVector<SILValue, 8> diffArgs;
for (auto indRes : ai->getIndirectSILResults())
diffArgs.push_back(getTangentBuffer(bb, indRes));
auto paramArgs = ai->getArgumentsWithoutIndirectResults();
// Get the tangent value of the original arguments.
for (auto i : indices(paramArgs)) {
auto origArg = paramArgs[i];
// If the argument is not active:
// - Skip the element, if it is not differentiable.
// - Otherwise, add a zero value to that location.
if (!activityInfo.isActive(origArg, getIndices())) {
auto origCalleeType = ai->getSubstCalleeType();
if (!origCalleeType->isDifferentiable())
auto actualOrigCalleeIndices =
if (actualOrigCalleeIndices->contains(i)) {
SILValue tanParam;
if (origArg->getType().isObject()) {
tanParam = emitZeroDirect(
getRemappedTangentType(origArg->getType()).getASTType(), loc);
} else {
tanParam = diffBuilder.createAllocStack(
loc, getRemappedTangentType(origArg->getType()));
getRemappedTangentType(origArg->getType()).getASTType(), tanParam,
// Otherwise, if the argument is active, handle the argument normally by
// getting its tangent value.
else {
SILValue tanParam;
if (origArg->getType().isObject()) {
tanParam = materializeTangent(getTangentValue(origArg), loc);
} else {
tanParam = getTangentBuffer(ai->getParent(), origArg);
if (errorOccurred)
// If callee differential was reabstracted in JVP, reabstract the callee
// differential.
if (!differentialType->isEqual(originalDifferentialType)) {
SILOptFunctionBuilder fb(context.getTransform());
auto *thunk = getOrCreateReabstractionThunk(
fb, context.getModule(), loc, &getDifferential(),
differentialType, originalDifferentialType);
auto *thunkRef = diffBuilder.createFunctionRef(loc, thunk);
differential = diffBuilder.createPartialApply(
loc, thunkRef,
{differential}, differentialType->getCalleeConvention());
// Call the differential.
auto *differentialCall = diffBuilder.createApply(
loc, differential, SubstitutionMap(), diffArgs,
/*isNonThrowing*/ false);
diffBuilder.emitDestroyValueOperation(loc, differential);
assert(differentialCall->getNumResults() == 1 &&
"Expected differential to return one result");
// Get the original results of the `apply` instructions.
SmallVector<SILValue, 8> origDirectResults;
forEachApplyDirectResult(ai, [&](SILValue directResult) {
SmallVector<SILValue, 8> origAllResults;
collectAllActualResultsInTypeOrder(ai, origDirectResults, origAllResults);
auto origResult = origAllResults[actualIndices.source];
// Get the differential results of the `apply` instructions.
SmallVector<SILValue, 8> differentialDirectResults;
forEachApplyDirectResult(differentialCall, [&](SILValue directResult) {
SmallVector<SILValue, 8> differentialAllResults;
auto differentialResult = differentialAllResults.front();
// Add tangent for original result.
if (origResult->getType().isObject()) {
if (!origResult->getType().is<TupleType>()) {
setTangentValue(bb, origResult,
} else if (auto *dti = getSingleDestructureTupleUser(ai)) {
bool notSetValue = true;
for (auto result : dti->getResults()) {
if (activityInfo.isActive(result, getIndices())) {
assert(notSetValue &&
"This was incorrectly set, should only have one active "
"result from the tuple.");
notSetValue = false;
setTangentValue(bb, result,
/// Generate a `return` instruction in the current differential basic block.
void emitReturnInstForDifferential() {
auto &differential = getDifferential();
auto diffLoc = differential.getLocation();
auto &diffBuilder = getDifferentialBuilder();
SmallVector<SILValue, 2> activeResults;
// This vector will contain all the materialized return elements.
SmallVector<SILValue, 8> retElts;
SmallVector<SILValue, 2> originalResults;
collectAllDirectResultsInTypeOrder(*original, originalResults);
// Materializes the return element corresponding to the result
// `resultIndex` into the `retElts` vector.
auto addActiveResult = [&](unsigned resultIndex) -> void {
auto origResult = originalResults[resultIndex];
assert(origResult->getType().isObject() &&
"Should only be handling direct results for 'return' "
if (activityInfo.isActive(origResult, getIndices())) {
// Create an array of the direct tangent values of the original results.
for (auto i : range(originalResults.size()))
assert(activeResults.size() <= 1);
if (activeResults.empty() && !originalResults.empty()) {
// Create zero tangent value for direct result.
auto origResult = originalResults[getIndices().source];
assert(origResult->getType().isObject() &&
"Should only be handling direct results for 'return' "
auto zeroType = origResult->getType().getASTType();
auto zero =
} else if (!activeResults.empty()) {
auto diffVal = getTangentValue(activeResults.front());
auto val = materializeTangent(diffVal, diffLoc);
diffLoc, joinElements(retElts, diffBuilder, diffLoc));
/// Set up the differential function. This includes:
/// - Creating all differential blocks.
/// - Creating differential entry block arguments based on the function type.
/// - Creating tangent value mapping for original/differential parameters.
/// - Checking for unvaried result and emitting related warnings.
void prepareForDifferentialGeneration() {
// Create differential blocks and arguments.
auto *diffGenEnv = getDifferential().getGenericEnvironment();
auto diffGenSig = diffGenEnv
? diffGenEnv->getGenericSignature()->getCanonicalSignature()
: nullptr;
auto &differential = getDifferential();
auto *origEntry = original->getEntryBlock();
for (auto &origBB : *original) {
auto *diffBB = differential.createBasicBlock();
diffBBMap.insert({&origBB, diffBB});
Lowering::GenericContextScope genericContextScope(
context.getTypeConverter(), diffGenSig);
auto diffStructLoweredType = remapSILTypeInDifferential(
// If the BB is the original entry, then the differential block that we
// just created must be the differential function's entry. Create
// differential entry arguments and continue.
if (&origBB == origEntry) {
auto *lastArg = diffBB->getArguments().back();
assert(lastArg->getType() == diffStructLoweredType);
differentialStructArguments[&origBB] = lastArg;
auto &s = getADDebugStream()
<< "Original bb" + std::to_string(origBB.getDebugID())
<< ": To differentiate or not to differentiate?\n";
for (auto &inst : origBB) {
s << (differentialInfo.shouldDifferentiateInstruction(&inst)
? "[∂] " : "[ ] ")
<< inst;
assert(diffBBMap.size() == 1 &&
"Can only currently handle single basic block functions");
// The differential function has type:
// (arg0', ..., argn', entry_df_struct) -> result'.
auto diffParamArgs =
assert(diffParamArgs.size() ==
auto origParamArgs = original->getArgumentsWithoutIndirectResults();
// TODO(TF-788): Re-enable non-varied result warning.
// Check if result is not varied.
SmallVector<SILValue, 8> origFormalResults;
collectAllFormalResultsInTypeOrder(*original, origFormalResults);
auto origResult = origFormalResults[getIndices().source];
// Emit warning if original result is not varied, because it will always
// have a zero derivative.
if (!activityInfo.isVaried(origResult, getIndices().parameters)) {
// Emit fixit if original result has a valid source location.
auto startLoc = origResult.getLoc().getStartSourceLoc();
auto endLoc = origResult.getLoc().getEndSourceLoc();
if (startLoc.isValid() && endLoc.isValid()) {
context.diagnose(startLoc, diag::autodiff_nonvaried_result_fixit)
.fixItInsert(startLoc, "withoutDerivative(at:")
.fixItInsertAfter(endLoc, ")");
// Initialize tangent mapping for parameters.
auto diffParamsIt = getIndices().parameters->begin();
for (auto index : range(diffParamArgs.size())) {
auto *diffArg = diffParamArgs[index];
auto *origArg = origParamArgs[*diffParamsIt];
if (diffArg->getType().isAddress()) {
setTangentBuffer(origEntry, origArg, diffArg);
} else {
origEntry, origArg, makeConcreteTangentValue(diffArg));
<< "Assigned parameter " << *diffArg
<< " as the tangent of original result " << *origArg);
// Initialize tangent mapping for indirect results.
auto origIndResults = original->getIndirectResults();
auto diffIndResults = differential.getIndirectResults();
assert(origIndResults.size() == diffIndResults.size());
for (auto &origBB : *original)
for (auto i : indices(diffIndResults))
setTangentBuffer(&origBB, origIndResults[i], diffIndResults[i]);
explicit JVPEmitter(ADContext &context, SILFunction *original,
SILDifferentiableAttr *attr, SILFunction *jvp,
DifferentiationInvoker invoker)
: TypeSubstCloner(*jvp, *original, getSubstitutionMap(original, jvp)),
context(context), original(original), attr(attr), jvp(jvp),
invoker(invoker), activityInfo(getActivityInfo(
context, original, attr->getIndices(), jvp)),
differentialInfo(context, AutoDiffLinearMapKind::Differential, original,
jvp, attr->getIndices(), activityInfo),
context, original, attr, &differentialInfo))),
diffLocalAllocBuilder(getDifferential()) {
// Create empty differential function.
static SILFunction *createEmptyDifferential(ADContext &context,
SILFunction *original,
SILDifferentiableAttr *attr,
LinearMapInfo *linearMapInfo) {
auto &module = context.getModule();
auto origTy = original->getLoweredFunctionType();
auto lookupConformance = LookUpConformanceInModule(module.getSwiftModule());
// RAII that pushes the original function's generic signature to
// `module.Types` so that calls to `module.Types.getTypeLowering()` below
// will know the original function's generic parameter types.
Lowering::GenericContextScope genericContextScope(
module.Types, origTy->getGenericSignature());
// Parameters of the differential are:
// - the tangent values of the wrt parameters.
// - the differential struct for the original entry.
// Result of the differential is in the tangent space of the original
// result.
SmallVector<SILParameterInfo, 8> dfParams;
SmallVector<SILResultInfo, 8> dfResults;
auto origParams = origTy->getParameters();
auto indices = attr->getIndices();
// Add differential results.
auto origResInfo = origTy->getResults()[indices.source];
// Add differential parameters for the requested wrt parameters.
for (auto i : indices.parameters->getIndices()) {
auto origParam = origParams[i];
// Accept a differential struct in the differential parameter list. This is
// the returned differential's closure context.
auto *origEntry = original->getEntryBlock();
auto *dfStruct = linearMapInfo->getLinearMapStruct(origEntry);
auto dfStructType =
dfParams.push_back({dfStructType, ParameterConvention::Direct_Owned});
Mangle::ASTMangler mangler;
auto diffName = original->getASTContext().getIdentifier(
original->getName(), AutoDiffLinearMapKind::Differential,
auto diffGenericSig = getDerivativeGenericSignature(attr, original);
auto *diffGenericEnv =
diffGenericSig ? diffGenericSig->getGenericEnvironment() : nullptr;
auto diffType = SILFunctionType::get(
diffGenericSig, origTy->getExtInfo(), origTy->getCoroutineKind(),
origTy->getCalleeConvention(), dfParams, {}, dfResults, None,
SILOptFunctionBuilder fb(context.getTransform());
// The generated tangent linkage is set to Hidden because generated tangent
// are never called cross-module.
auto linkage = SILLinkage::Hidden;
auto *differential = fb.createFunction(
linkage, diffName, diffType, diffGenericEnv, original->getLocation(),
original->isBare(), IsNotTransparent, original->isSerialized(),
new (module) SILDebugScope(original->getLocation(), differential));
return differential;
/// Run JVP generation. Returns true on error.
bool run() {
<< "Cloning original @" << original->getName()
<< " to jvp @" << jvp->getName() << '\n');
// Create JVP and differential entry and arguments.
auto *entry = jvp->createBasicBlock();
// Clone.
SmallVector<SILValue, 4> entryArgs(entry->getArguments().begin(),
cloneFunctionBody(original, entry, entryArgs);
// If errors occurred, back out.
if (errorOccurred)
return true;
LLVM_DEBUG(getADDebugStream() << "Generated JVP for "
<< original->getName() << ":\n" << *jvp);
LLVM_DEBUG(getADDebugStream() << "Generated differential for "
<< original->getName() << ":\n" << getDifferential());
return errorOccurred;
void postProcess(SILInstruction *orig, SILInstruction *cloned) {
if (errorOccurred)
SILClonerWithScopes::postProcess(orig, cloned);
/// Remap original basic blocks.
SILBasicBlock *remapBasicBlock(SILBasicBlock *bb) {
auto *jvpBB = BBMap[bb];
return jvpBB;
/// General visitor for all instructions. If any error is emitted by previous
/// visits, bail out.
void visit(SILInstruction *inst) {
auto diffBuilder = getDifferentialBuilder();
if (errorOccurred)
if (differentialInfo.shouldDifferentiateInstruction(inst)) {
LLVM_DEBUG(getADDebugStream() << "JVPEmitter visited:\n[ORIG]" << *inst);
#ifndef NDEBUG
auto beforeInsertion = std::prev(diffBuilder.getInsertionPoint());
auto &s = llvm::dbgs() << "[TAN] Emitted in differential:\n";
auto afterInsertion = diffBuilder.getInsertionPoint();
for (auto it = ++beforeInsertion; it != afterInsertion; ++it)
s << *it;
} else {
void visitSILInstruction(SILInstruction *inst) {
context.emitNondifferentiabilityError(inst, invoker,
errorOccurred = true;
void visitInstructionsInBlock(SILBasicBlock *bb) {
// Destructure the differential struct to get the elements.
auto &diffBuilder = getDifferentialBuilder();
auto diffLoc = getDifferential().getLocation();
auto *diffBB = diffBBMap.lookup(bb);
auto *mainDifferentialStruct = diffBB->getArguments().back();
auto *dsi = diffBuilder.createDestructureStruct(
diffLoc, mainDifferentialStruct);
initializeDifferentialStructElements(bb, dsi->getResults());
// If an `apply` has active results or active inout parameters, replace it
// with an `apply` of its JVP.
void visitApplyInst(ApplyInst *ai) {
// If the function should not be differentiated or its the array literal
// initialization intrinsic, just do standard cloning.
if (!differentialInfo.shouldDifferentiateApplyInst(ai) ||
isArrayLiteralIntrinsic(ai)) {
LLVM_DEBUG(getADDebugStream() << "No active results:\n" << *ai << '\n');
// Check and reject functions with active inout arguments. It's not yet
// supported.
auto paramInfos = ai->getSubstCalleeConv().getParameters();
auto paramArgs = ai->getArgumentsWithoutIndirectResults();
for (unsigned i : swift::indices(paramInfos)) {
if (paramInfos[i].isIndirectInOut() &&
activityInfo.isActive(paramArgs[i], getIndices())) {
context.emitNondifferentiabilityError(ai, invoker,
errorOccurred = true;
LLVM_DEBUG(getADDebugStream() << "JVP-transforming:\n" << *ai << '\n');
// Get the minimal parameter and result indices required for differentiating
// this `apply`.
SmallVector<SILValue, 4> allResults;
SmallVector<unsigned, 8> activeParamIndices;
SmallVector<unsigned, 8> activeResultIndices;
collectMinimalIndicesForFunctionCall(ai, getIndices(), activityInfo,
allResults, activeParamIndices,
assert(!activeParamIndices.empty() && "Parameter indices cannot be empty");
assert(!activeResultIndices.empty() && "Result indices cannot be empty");
LLVM_DEBUG(auto &s = getADDebugStream() << "Active indices: params={";
interleave(activeParamIndices.begin(), activeParamIndices.end(),
[&s](unsigned i) { s << i; }, [&s] { s << ", "; });
s << "}, results={"; interleave(
activeResultIndices.begin(), activeResultIndices.end(),
[&s](unsigned i) { s << i; }, [&s] { s << ", "; });
s << "}\n";);
// FIXME: We don't support multiple active results yet.
if (activeResultIndices.size() > 1) {
ai, invoker, diag::autodiff_expression_not_differentiable_note);
errorOccurred = true;
// Form expected indices, assuming there's only one result.
SILAutoDiffIndices indices(
getASTContext(), ai->getArgumentsWithoutIndirectResults().size(),
// Emit the JVP.
auto loc = ai->getLoc();
auto &builder = getBuilder();
auto original = getOpValue(ai->getCallee());
SILValue jvpValue;
// If functionSource is a `@differentiable` function, just extract it.
auto originalFnTy = original->getType().castTo<SILFunctionType>();
if (originalFnTy->isDifferentiable()) {
auto paramIndices = originalFnTy->getDifferentiationParameterIndices();
for (auto i : indices.parameters->getIndices()) {
if (!paramIndices->contains(i)) {
context.emitNondifferentiabilityError(original, invoker,
errorOccurred = true;
auto borrowedDiffFunc = builder.emitBeginBorrowOperation(loc, original);
jvpValue = builder.createDifferentiableFunctionExtract(
loc, NormalDifferentiableFunctionTypeComponent::JVP,
jvpValue = builder.emitCopyValueOperation(loc, jvpValue);
// If JVP has not yet been found, emit an `differentiable_function`
// instruction on the remapped original function operand and
// an `differentiable_function_extract` instruction to get the JVP.
// The `differentiable_function` instruction will be canonicalized during
// the transform main loop.
if (!jvpValue) {
// FIXME: Handle indirect differentiation invokers. This may require some
// redesign: currently, each original function + attribute pair is mapped
// only to one invoker.
DifferentiationInvoker indirect(ai, attr);
auto insertion =
context.getInvokers().try_emplace({this->original, attr}, indirect);
auto &invoker = insertion.first->getSecond();
invoker = indirect;
// If the original `apply` instruction has a substitution map, then the
// applied function is specialized.
// In the JVP, specialization is also necessary for parity. The original
// function operand is specialized with a remapped version of same
// substitution map using an argument-less `partial_apply`.
if (ai->getSubstitutionMap().empty()) {
original = builder.emitCopyValueOperation(loc, original);
} else {
auto substMap = getOpSubstitutionMap(ai->getSubstitutionMap());
auto jvpPartialApply = getBuilder().createPartialApply(
ai->getLoc(), original, substMap, {},
original = jvpPartialApply;
// Check and diagnose non-differentiable original function type.
auto diagnoseNondifferentiableOriginalFunctionType =
[&](CanSILFunctionType origFnTy) {
// Check and diagnose non-differentiable arguments.
for (unsigned paramIndex : range(originalFnTy->getNumParameters())) {
if (indices.isWrtParameter(paramIndex) &&
.isDifferentiable(getModule())) {
ai->getArgumentsWithoutIndirectResults()[paramIndex], invoker,
errorOccurred = true;
return true;
// Check and diagnose non-differentiable results.
if (!originalFnTy->getResults()[indices.source]
.isDifferentiable(getModule())) {
original, invoker, diag::autodiff_nondifferentiable_result);
errorOccurred = true;
return true;
return false;
if (diagnoseNondifferentiableOriginalFunctionType(originalFnTy))
auto *diffFuncInst = context.createDifferentiableFunction(
builder, loc, indices.parameters, original);
// Record the `differentiable_function` instruction.
// TODO(TF-689): Make `differentiable_function` store result indices and
// remove `ADContext::resultIndices`.
context.getResultIndices()[diffFuncInst] = activeResultIndices.front();
auto borrowedADFunc =
builder.emitBeginBorrowOperation(loc, diffFuncInst);
auto extractedJVP = builder.createDifferentiableFunctionExtract(
loc, NormalDifferentiableFunctionTypeComponent::JVP,
jvpValue = builder.emitCopyValueOperation(loc, extractedJVP);
builder.emitEndBorrowOperation(loc, borrowedADFunc);
builder.emitDestroyValueOperation(loc, diffFuncInst);
// Call the JVP using the original parameters.
SmallVector<SILValue, 8> jvpArgs;
auto jvpFnTy = getOpType(jvpValue->getType()).castTo<SILFunctionType>();
auto numJVPArgs =
jvpFnTy->getNumParameters() + jvpFnTy->getNumIndirectFormalResults();
// Collect substituted arguments.
for (auto origArg : ai->getArguments())
assert(jvpArgs.size() == numJVPArgs);
// Apply the JVP.
// The JVP should be specialized, so no substitution map is necessary.
auto *jvpCall = getBuilder().createApply(loc, jvpValue, SubstitutionMap(),
jvpArgs, ai->isNonThrowing());
LLVM_DEBUG(getADDebugStream() << "Applied jvp function\n" << *jvpCall);
// Release the differentiable function.
builder.emitDestroyValueOperation(loc, jvpValue);
// Get the JVP results (original results and differential).
SmallVector<SILValue, 8> jvpDirectResults;
extractAllElements(jvpCall, builder, jvpDirectResults);
auto originalDirectResults =
auto originalDirectResult =
joinElements(originalDirectResults, getBuilder(), jvpCall->getLoc());
mapValue(ai, originalDirectResult);
// Some instructions that produce the callee may have been cloned.
// If the original callee did not have any users beyond this `apply`,
// recursively kill the cloned callee.
if (auto *origCallee = cast_or_null<SingleValueInstruction>(
if (origCallee->hasOneUse())
// Add the differential function for when we create the struct we partially
// apply to the differential we are generating.
auto differential = jvpDirectResults.back();
auto *differentialDecl = differentialInfo.lookUpLinearMapDecl(ai);
auto originalDifferentialType =
auto differentialType =
auto jvpGenSig = SubsMap.getGenericSignature()
? SubsMap.getGenericSignature()->getCanonicalSignature()
: nullptr;
Lowering::GenericContextScope genericContextScope(
context.getTypeConverter(), jvpGenSig);
auto loweredDifferentialType =
// If actual differential type does not match lowered differential type,
// reabstract the differential using a thunk.
if (!loweredDifferentialType->isEqual(originalDifferentialType)) {
SILOptFunctionBuilder fb(context.getTransform());
auto *thunk = getOrCreateReabstractionThunk(
fb, context.getModule(), loc, &getDifferential(),
differentialType, loweredDifferentialType);
auto *thunkRef = builder.createFunctionRef(loc, thunk);
differential = builder.createPartialApply(
loc, thunkRef,
{differential}, differentialType->getCalleeConvention());
// Differential emission.
emitTangentForApplyInst(ai, indices, originalDifferentialType);
void visitReturnInst(ReturnInst *ri) {
auto loc = ri->getOperand().getLoc();
auto *origExit = ri->getParent();
auto &builder = getBuilder();
auto *diffStructVal = buildDifferentialValueStructValue(ri);
// Get the JVP value corresponding to the original functions's return value.
auto *origRetInst = cast<ReturnInst>(origExit->getTerminator());
auto origResult = getOpValue(origRetInst->getOperand());
SmallVector<SILValue, 8> origResults;
extractAllElements(origResult, builder, origResults);
// Get and partially apply the differential.
auto jvpGenericEnv = jvp->getGenericEnvironment();
auto jvpSubstMap = jvpGenericEnv
? jvpGenericEnv->getForwardingSubstitutionMap()
: jvp->getForwardingSubstitutionMap();
auto *differentialRef =
builder.createFunctionRef(loc, &getDifferential());
auto *differentialPartialApply = builder.createPartialApply(
loc, differentialRef, jvpSubstMap, {diffStructVal},
// Return a tuple of the original result and pullback.
SmallVector<SILValue, 8> directResults;
directResults.append(origResults.begin(), origResults.end());
ri->getLoc(), joinElements(directResults, builder, loc));
void visitBranchInst(BranchInst *bi) {
llvm_unreachable("Unsupported SIL instruction.");
void visitCondBranchInst(CondBranchInst *cbi) {
llvm_unreachable("Unsupported SIL instruction.");
void visitSwitchEnumInst(SwitchEnumInst *sei) {
llvm_unreachable("Unsupported SIL instruction.");
void visitDifferentiableFunctionInst(DifferentiableFunctionInst *dfi) {
// Clone `differentiable_function` from original to JVP, then add the cloned
// instruction to the `differentiable_function` worklist.
auto *newDFI = cast<DifferentiableFunctionInst>(getOpValue(dfi));
} // end anonymous namespace
// PullbackEmitter - visitors on the original function for pullback code
// generation
namespace {
class PullbackEmitter final : public SILInstructionVisitor<PullbackEmitter> {
/// The parent VJP emitter.
VJPEmitter &vjpEmitter;
/// Dominance info for the original function.
DominanceInfo *domInfo = nullptr;
/// Post-dominance info for the original function.
PostDominanceInfo *postDomInfo = nullptr;
/// Post-order info for the original function.
PostOrderFunctionInfo *postOrderInfo = nullptr;
/// Mapping from original basic blocks to corresponding pullback basic blocks.
/// Pullback basic blocks always have the predecessor as the single argument.
DenseMap<SILBasicBlock *, SILBasicBlock *> pullbackBBMap;
/// Mapping from original basic blocks and original values to corresponding
/// adjoint values.
DenseMap<std::pair<SILBasicBlock *, SILValue>, AdjointValue> valueMap;
/// Mapping from original basic blocks and original buffers to corresponding
/// adjoint buffers.
DenseMap<std::pair<SILBasicBlock *, SILValue>, SILValue> bufferMap;
/// Mapping from pullback basic blocks to pullback struct arguments.
DenseMap<SILBasicBlock *, SILArgument *> pullbackStructArguments;
/// Mapping from pullback struct field declarations to pullback struct
/// elements destructured from the linear map basic block argument. In the
/// beginning of each pullback basic block, the block's pullback struct is
/// destructured into individual elements stored here.
DenseMap<VarDecl *, SILValue> pullbackStructElements;
/// Mapping from original basic blocks and successor basic blocks to
/// corresponding pullback trampoline basic blocks. Trampoline basic blocks
/// take additional arguments in addition to the predecessor enum argument.
DenseMap<std::pair<SILBasicBlock *, SILBasicBlock *>, SILBasicBlock *>
/// Mapping from original basic blocks to dominated active values.
DenseMap<SILBasicBlock *, SmallVector<SILValue, 8>> activeValues;
/// Mapping from original basic blocks and original active values to
/// corresponding pullback block arguments.
DenseMap<std::pair<SILBasicBlock *, SILValue>, SILArgument *>
/// Mapping from original basic blocks to local temporary values to be cleaned
/// up. This is populated when pullback emission is run on one basic block and
/// cleaned before processing another basic block.
DenseMap<SILBasicBlock *, SmallVector<SILValue, 64>>
llvm::DenseSet<SILValue> blockTemporarySet;
/// The main builder.
SILBuilder builder;
/// An auxiliary local allocation builder.
SILBuilder localAllocBuilder;
/// Stack buffers allocated for storing local adjoint values.
SmallVector<SILValue, 64> functionLocalAllocations;
/// A set used to remember local allocations that were destroyed.
llvm::SmallDenseSet<SILValue> destroyedLocalAllocations;
/// The seed argument in the pullback function.
SILArgument *seed = nullptr;
llvm::BumpPtrAllocator allocator;
bool errorOccurred = false;
ADContext &getContext() const { return vjpEmitter.context; }
SILModule &getModule() const { return getContext().getModule(); }
ASTContext &getASTContext() const { return getPullback().getASTContext(); }
SILFunction &getOriginal() const { return *vjpEmitter.original; }
SILFunction &getPullback() const { return *vjpEmitter.pullback; }
SILDifferentiableAttr *getAttr() const { return vjpEmitter.attr; }
DifferentiationInvoker getInvoker() const { return vjpEmitter.invoker; }
LinearMapInfo &getPullbackInfo() { return vjpEmitter.pullbackInfo; }
const SILAutoDiffIndices &getIndices() const {
return vjpEmitter.getIndices();
const DifferentiableActivityInfo &getActivityInfo() const {
return vjpEmitter.activityInfo;
explicit PullbackEmitter(VJPEmitter &vjpEmitter)
: vjpEmitter(vjpEmitter), builder(getPullback()),
localAllocBuilder(getPullback()) {
// Get dominance and post-order info for the original function.
auto &passManager = getContext().getPassManager();
auto *domAnalysis = passManager.getAnalysis<DominanceAnalysis>();
auto *postDomAnalysis = passManager.getAnalysis<PostDominanceAnalysis>();
auto *postOrderAnalysis = passManager.getAnalysis<PostOrderAnalysis>();
domInfo = domAnalysis->get(vjpEmitter.original);
postDomInfo = postDomAnalysis->get(vjpEmitter.original);
postOrderInfo = postOrderAnalysis->get(vjpEmitter.original);
// Pullback struct mapping
void initializePullbackStructElements(SILBasicBlock *origBB,
SILInstructionResultArray values) {
auto *pbStructDecl = getPullbackInfo().getLinearMapStruct(origBB);
assert(pbStructDecl->getStoredProperties().size() == values.size() &&
"The number of pullback struct fields must equal the number of "
"pullback struct element values");
for (auto pair : llvm::zip(pbStructDecl->getStoredProperties(), values)) {
std::get<1>(pair).getOwnershipKind() != ValueOwnershipKind::Guaranteed
&& "Pullback struct elements must be @owned");
auto insertion =
pullbackStructElements.insert({std::get<0>(pair), std::get<1>(pair)});
assert(insertion.second && "A pullback struct element already exists!");
SILValue getPullbackStructElement(SILBasicBlock *origBB, VarDecl *field) {
assert(getPullbackInfo().getLinearMapStruct(origBB) ==
assert(pullbackStructElements.count(field) &&
"Pullback struct element for this field does not exist!");
return pullbackStructElements.lookup(field);
// Adjoint value factory methods
AdjointValue makeZeroAdjointValue(SILType type);
AdjointValue makeConcreteAdjointValue(SILValue value);
template<typename EltRange>
AdjointValue makeAggregateAdjointValue(SILType type, EltRange elements);
// Temporary value management
/// Record a temporary value for cleanup before its block's terminator.
SILValue recordTemporary(SILValue value) {
LLVM_DEBUG(getADDebugStream() << "Recorded temporary " << value);
auto insertion = blockTemporarySet.insert(value); (void)insertion;
assert(insertion.second && "Temporary already recorded?");
return value;
/// Clean up all temporary values for the given block.
void cleanUpTemporariesForBlock(SILBasicBlock *bb, SILLocation loc) {
LLVM_DEBUG(getADDebugStream() << "Cleaning up temporaries for bb"
<< bb->getDebugID() << '\n');
for (auto temp : blockTemporaries[bb]) {
builder.emitDestroyValueOperation(loc, temp);
// Symbolic value materializers
/// Materialize an adjoint value. The type of the given adjoint value must be
/// loadable.
SILValue materializeAdjointDirect(AdjointValue val, SILLocation loc);
/// Materialize an adjoint value indirectly to a SIL buffer.
void materializeAdjointIndirect(AdjointValue val, SILValue destBuffer,
SILLocation loc);
// Helpers for symbolic value materializers
/// Emit a zero value by calling ``. The given type
/// must conform to `AdditiveArithmetic`.
void emitZeroIndirect(CanType type, SILValue bufferAccess, SILLocation loc);
/// Emit a zero value by calling ``. The given type
/// must conform to `AdditiveArithmetic` and be loadable in SIL.
SILValue emitZeroDirect(CanType type, SILLocation loc);
// Accumulator
/// Materialize an adjoint value in the most efficient way.
SILValue materializeAdjoint(AdjointValue val, SILLocation loc);
/// Given two adjoint values, accumulate them.
AdjointValue accumulateAdjointsDirect(AdjointValue lhs, AdjointValue rhs,
SILLocation loc);
/// Given two materialized adjoint values, accumulate them. These two
/// adjoints must be objects of loadable type.
SILValue accumulateDirect(SILValue lhs, SILValue rhs, SILLocation loc);
/// Given two materialized adjoint values, accumulate them using
/// `AdditiveArithmetic.+`, depending on the differentiation mode.
void accumulateIndirect(SILValue resultBufAccess,
SILValue lhsBufAccess, SILValue rhsBufAccess,
SILLocation loc);
/// Given two buffers of an `AdditiveArithmetic` type, accumulate the right
/// hand side into the left hand side using `+=`.
void accumulateIndirect(SILValue lhsDestAccess, SILValue rhsAccess,
SILLocation loc);
// Type transformer
/// Remap any archetypes into the current function's context.
SILType remapType(SILType ty) {
if (ty.hasArchetype())
return getPullback().mapTypeIntoContext(ty.mapTypeOutOfContext());
return getPullback().mapTypeIntoContext(ty);
Optional<VectorSpace> getTangentSpace(CanType type) {
return type->getAutoDiffAssociatedTangentSpace(
/// Assuming the given type conforms to `Differentiable` after remapping,
/// returns the associated tangent space type.
SILType getRemappedTangentType(SILType type) {
return SILType::getPrimitiveType(
/// Substitutes all replacement types of the given substitution map using the
/// pullback function's substitution map.
SubstitutionMap remapSubstitutionMap(SubstitutionMap substMap) {
return substMap.subst(getPullback().getForwardingSubstitutionMap());
// Managed value mapping
/// Returns true if the original value has a corresponding adjoint value.
bool hasAdjointValue(SILBasicBlock *origBB, SILValue originalValue) const {
assert(origBB->getParent() == &getOriginal());
return valueMap.count({origBB, originalValue});
/// Initializes an original value's corresponding adjoint value. It must not
/// have an adjoint value before this function is called.
void setAdjointValue(SILBasicBlock *origBB, SILValue originalValue,
AdjointValue adjointValue) {
LLVM_DEBUG(getADDebugStream() << "Setting adjoint value for "
<< originalValue);
assert(origBB->getParent() == &getOriginal());
assert(originalValue->getFunction() == &getOriginal());
// The adjoint value must be in the tangent space.
assert(adjointValue.getType() ==
auto insertion = valueMap.try_emplace({origBB, originalValue},
<< "The existing adjoint value will be replaced: "
<< insertion.first->getSecond());
if (!insertion.second)
insertion.first->getSecond() = adjointValue;
/// Get the adjoint for an original value. The given value must be in the
/// original function.
/// This method first tries to find an entry in `adjointMap`. If an adjoint
/// doesn't exist, create a zero adjoint.
AdjointValue getAdjointValue(SILBasicBlock *origBB, SILValue originalValue) {
assert(origBB->getParent() == &getOriginal());
assert(originalValue->getFunction() == &getOriginal());
auto insertion = valueMap.try_emplace(
{origBB, originalValue}, makeZeroAdjointValue(
auto it = insertion.first;
return it->getSecond();
/// Add an adjoint value for the given original value.
void addAdjointValue(SILBasicBlock *origBB, SILValue originalValue,
AdjointValue newAdjointValue, SILLocation loc) {
assert(origBB->getParent() == &getOriginal());
assert(originalValue->getFunction() == &getOriginal());
LLVM_DEBUG(getADDebugStream() << "Adding adjoint for " << originalValue);
// The adjoint value must be in the tangent space.
assert(newAdjointValue.getType() ==
auto insertion =
valueMap.try_emplace({origBB, originalValue}, newAdjointValue);
auto inserted = insertion.second;
if (inserted)
// If adjoint already exists, accumulate the adjoint onto the existing
// adjoint.
auto it = insertion.first;
auto existingValue = it->getSecond();
auto adjVal = accumulateAdjointsDirect(existingValue, newAdjointValue, loc);
setAdjointValue(origBB, originalValue, adjVal);
/// Get the pullback block argument corresponding to the given original block
/// and active value.
SILArgument *getActiveValuePullbackBlockArgument(SILBasicBlock *origBB,
SILValue activeValue) {
assert(origBB->getParent() == &getOriginal());
auto pullbackBBArg =
activeValuePullbackBBArgumentMap[{origBB, activeValue}];
assert(pullbackBBArg->getParent() == getPullbackBlock(origBB));
return pullbackBBArg;
// Buffer mapping
void setAdjointBuffer(SILBasicBlock *origBB,
SILValue originalBuffer,
SILValue adjointBuffer) {
auto insertion =
bufferMap.try_emplace({origBB, originalBuffer}, adjointBuffer);
assert(insertion.second); (void)insertion;
SILValue getAdjointProjection(SILBasicBlock *origBB,
SILValue originalProjection) {
// Handle `struct_element_addr`.
if (auto *seai = dyn_cast<StructElementAddrInst>(originalProjection)) {
auto adjSource = getAdjointBuffer(origBB, seai->getOperand());
auto *tangentVectorDecl =
auto tanFieldLookup =
assert(tanFieldLookup.size() == 1);
auto *tanField = cast<VarDecl>(tanFieldLookup.front());
return builder.createStructElementAddr(
seai->getLoc(), adjSource, tanField);
// Handle `tuple_element_addr`.
if (auto *teai = dyn_cast<TupleElementAddrInst>(originalProjection)) {
auto source = teai->getOperand();
auto adjSource = getAdjointBuffer(origBB, source);
if (!adjSource->getType().is<TupleType>())
return adjSource;
auto origTupleTy = source->getType().castTo<TupleType>();
unsigned adjIndex = 0;
for (unsigned i : range(teai->getFieldNo())) {
if (getTangentSpace(
return builder.createTupleElementAddr(
teai->getLoc(), adjSource, adjIndex);
// Handle `begin_access`.
if (auto *bai = dyn_cast<BeginAccessInst>(originalProjection)) {
auto adjBase = getAdjointBuffer(origBB, bai->getOperand());
if (errorOccurred)
return (bufferMap[{origBB, originalProjection}] = SILValue());
// Return the base buffer's adjoint buffer.
return adjBase;
return SILValue();
SILBasicBlock::iterator getNextFunctionLocalAllocationInsertionPoint() {
// If there are no local allocations, insert at the pullback entry start.
if (functionLocalAllocations.empty())
return getPullback().getEntryBlock()->begin();
// Otherwise, insert before the last local allocation. Inserting before
// rather than after ensures that allocation and zero initialization
// instructions are grouped together.
auto lastLocalAlloc = functionLocalAllocations.back();
return lastLocalAlloc->getDefiningInstruction()->getIterator();
SILValue &getAdjointBuffer(SILBasicBlock *origBB, SILValue originalBuffer) {
assert(originalBuffer->getFunction() == &getOriginal());
auto insertion = bufferMap.try_emplace({origBB, originalBuffer},
if (!insertion.second) // not inserted
return insertion.first->getSecond();
// If the original buffer is a projection, return a corresponding projection
// into the adjoint buffer.
if (auto adjProj = getAdjointProjection(origBB, originalBuffer))
return (bufferMap[{origBB, originalBuffer}] = adjProj);
// Set insertion point for local allocation builder: before the last local
// allocation, or at the start of the pullback function's entry if no local
// allocations exist yet.
// Allocate local buffer and initialize to zero.
auto bufObjectType = getRemappedTangentType(originalBuffer->getType());
auto *newBuf = localAllocBuilder.createAllocStack(
RegularLocation::getAutoGeneratedLocation(), bufObjectType);
// Temporarily change global builder insertion point and emit zero into the
// local buffer.
auto insertionPoint = builder.getInsertionBB();
emitZeroIndirect(bufObjectType.getASTType(), newBuf, newBuf->getLoc());
// Register the local buffer.
return (insertion.first->getSecond() = newBuf);
// Accumulates `rhsBufferAccess` into the adjoint buffer corresponding to
// `originalBuffer`.
void addToAdjointBuffer(SILBasicBlock *origBB, SILValue originalBuffer,
SILValue rhsBufferAccess, SILLocation loc) {
assert(originalBuffer->getType().isAddress() &&
assert(originalBuffer->getFunction() == &getOriginal());
assert(rhsBufferAccess->getFunction() == &getPullback());
auto adjointBuffer = getAdjointBuffer(origBB, originalBuffer);
accumulateIndirect(adjointBuffer, rhsBufferAccess, loc);
// CFG mapping
SILBasicBlock *getPullbackBlock(SILBasicBlock *originalBlock) {
return pullbackBBMap.lookup(originalBlock);
SILBasicBlock *getPullbackTrampolineBlock(
SILBasicBlock *originalBlock, SILBasicBlock *successorBlock) {
return pullbackTrampolineBBMap.lookup({originalBlock, successorBlock});
// Entry point
/// Performs pullback generation on the empty pullback function. Returns true
/// if any error occurs.
bool run() {
auto &original = getOriginal();
auto &pullback = getPullback();
auto pbLoc = getPullback().getLocation();
LLVM_DEBUG(getADDebugStream() << "Running PullbackEmitter on\n"
<< original);
auto *pbGenEnv = getPullback().getGenericEnvironment();
auto pbGenSig = pbGenEnv
? pbGenEnv->getGenericSignature()->getCanonicalSignature()
: nullptr;
Lowering::GenericContextScope genericContextScope(
getContext().getTypeConverter(), pbGenSig);
auto origExitIt = original.findReturnBB();
assert(origExitIt != original.end() &&
"Functions without returns must have been diagnosed");
auto *origExit = &*origExitIt;
SmallVector<SILValue, 8> origFormalResults;
collectAllFormalResultsInTypeOrder(original, origFormalResults);
auto origResult = origFormalResults[getIndices().source];
// If original result is non-varied, it will always have a zero derivative.
// Skip full pullback generation and simply emit zero derivatives for wrt
// parameters.
// NOTE(TF-876): This shortcut is currently necessary for functions
// returning non-varied result with >1 basic block where some basic blocks
// have no dominated active values; control flow differentiation does not
// handle this case. See TF-876 for context.
if (!getActivityInfo().isVaried(origResult, getIndices().parameters)) {
return false;
// Get dominated active values in original blocks.
// Adjoint values of dominated active values are passed as pullback block
// arguments.
DominanceOrder domOrder(original.getEntryBlock(), domInfo);
while (auto *bb = domOrder.getNext()) {
auto &bbActiveValues = activeValues[bb];
// If the current block has an immediate dominator, append the immediate
// dominator block's active values to the current block's active values.
if (auto *domNode = domInfo->getNode(bb)->getIDom()) {
auto &domBBActiveValues = activeValues[domNode->getBlock()];
SmallPtrSet<SILValue, 8> visited(bbActiveValues.begin(),
// Register a value as active if it has not yet been visited.
auto addActiveValue = [&](SILValue v) {
if (visited.count(v))
// Diagnose active enum values. Differentiation of enum values is not
// yet supported; requires special adjoint value handling.
if (v->getType().getEnumOrBoundGenericEnum()) {
v, getInvoker(), diag::autodiff_enums_unsupported);
errorOccurred = true;
// Skip address projections.
// Address projections do not need their own adjoint buffers; they
// become projections into their adjoint base buffer.
if (Projection::isAddressProjection(v))
// Register bb arguments and all instruction operands/results.
for (auto *arg : bb->getArguments())
if (getActivityInfo().isActive(arg, getIndices()))
for (auto &inst : *bb) {
for (auto op : inst.getOperandValues())
if (getActivityInfo().isActive(op, getIndices()))
for (auto result : inst.getResults())
if (getActivityInfo().isActive(result, getIndices()))
if (errorOccurred)
return true;
// Create pullback blocks and arguments, visiting original blocks in
// post-order post-dominance order.
SmallVector<SILBasicBlock *, 8> postOrderPostDomOrder;
// Start from the root node, which may have a marker `nullptr` block if
// there are multiple roots.
PostOrderPostDominanceOrder postDomOrder(postDomInfo->getRootNode(),
postOrderInfo, original.size());
while (auto *origNode = postDomOrder.getNext()) {
auto *origBB = origNode->getBlock();
// If node is the `nullptr` marker basic block, do not push it.
if (!origBB)
for (auto *origBB : postOrderPostDomOrder) {
auto *pullbackBB = pullback.createBasicBlock();
pullbackBBMap.insert({origBB, pullbackBB});
auto pbStructLoweredType =
// If the BB is the original exit, then the pullback block that we just
// created must be the pullback function's entry. For the pullback entry,
// create entry arguments and continue to the next block.
if (origBB == origExit) {
auto *mainPullbackStruct = pullbackBB->getArguments().back();
assert(mainPullbackStruct->getType() == pbStructLoweredType);
pullbackStructArguments[origBB] = mainPullbackStruct;
// Destructure the pullback struct to get the elements.
auto *dsi = builder.createDestructureStruct(pbLoc, mainPullbackStruct);
initializePullbackStructElements(origBB, dsi->getResults());
// Get all active values in the original block.
// If the original block has no active values, continue.
auto &bbActiveValues = activeValues[origBB];
if (bbActiveValues.empty())
// Otherwise, if the original block has active values:
// - For each active buffer in the original block, allocate a new local
// buffer in the pullback entry. (All adjoint buffers are allocated in
// the pullback entry and deallocated in the pullback exit.)
// - For each active value in the original block, add adjoint value
// arguments to the pullback block.
for (auto activeValue : bbActiveValues) {
if (activeValue->getType().isAddress()) {
// Allocate and zero initialize a new local buffer using
// `getAdjointBuffer`.
getAdjointBuffer(origBB, activeValue);
} else {
// Create and register pullback block argument for the active value.
auto *pullbackArg = pullbackBB->createPhiArgument(
activeValuePullbackBBArgumentMap[{origBB, activeValue}] = pullbackArg;
// Add a pullback struct argument.
auto *pbStructArg = pullbackBB->createPhiArgument(
pbStructLoweredType, ValueOwnershipKind::Owned);
pullbackStructArguments[origBB] = pbStructArg;
// Destructure the pullback struct to get the elements.
auto *dsi = builder.createDestructureStruct(pbLoc, pbStructArg);
initializePullbackStructElements(origBB, dsi->getResults());
// - Create pullback trampoline blocks for each successor block of the
// original block. Pullback trampoline blocks only have a pullback
// struct argument. They branch from a pullback successor block to the
// pullback original block, passing adjoint values of active values.
for (auto *succBB : origBB->getSuccessorBlocks()) {
auto *pullbackTrampolineBB =
pullbackTrampolineBBMap.insert({{origBB, succBB},
// Get the enum element type (i.e. the pullback struct type). The enum
// element type may be boxed if the enum is indirect.
auto enumLoweredTy =
auto *enumEltDecl =
getPullbackInfo().lookUpBranchingTraceEnumElement(origBB, succBB);
auto enumEltType = remapType(
enumLoweredTy.getEnumElementType(enumEltDecl, getModule()));
auto *pullbackEntry = pullback.getEntryBlock();
// The pullback function has type (seed, exit_pbs) -> ([arg0], ..., [argn]).
auto pbParamArgs = pullback.getArgumentsWithoutIndirectResults();
assert(pbParamArgs.size() == 2);
seed = pbParamArgs[0];
// Assign adjoint for original result.
pullbackEntry, getNextFunctionLocalAllocationInsertionPoint());
if (seed->getType().isAddress()) {
auto *seedBufCopy = builder.createAllocStack(pbLoc, seed->getType());
builder.createCopyAddr(pbLoc, seed, seedBufCopy, IsNotTake,
setAdjointBuffer(origExit, origResult, seedBufCopy);
<< "Assigned seed buffer " << seedBufCopy
<< " as the adjoint of original indirect result "
<< origResult);
} else {
setAdjointValue(origExit, origResult, makeConcreteAdjointValue(seed));
<< "Assigned seed " << *seed
<< " as the adjoint of original result " << origResult);
// Visit original blocks blocks in post-order and perform differentiation
// in corresponding pullback blocks. If errors occurred, back out.
for (auto *bb : postOrderPostDomOrder) {
if (errorOccurred)
return true;
// Prepare and emit a `return` in the pullback exit block.
auto *origEntry = getOriginal().getEntryBlock();
auto *pbExit = getPullbackBlock(origEntry);
// This vector will contain all the materialized return elements.
SmallVector<SILValue, 8> retElts;
// This vector will contain all indirect parameter adjoint buffers.
SmallVector<SILValue, 4> indParamAdjoints;
auto origParams = getOriginal().getArgumentsWithoutIndirectResults();
// Materializes the return element corresponding to the parameter
// `parameterIndex` into the `retElts` vector.
auto addRetElt = [&](unsigned parameterIndex) -> void {
auto origParam = origParams[parameterIndex];
if (origParam->getType().isObject()) {
auto pbVal = getAdjointValue(origEntry, origParam);
auto val = materializeAdjointDirect(pbVal, pbLoc);
auto newVal = builder.emitCopyValueOperation(pbLoc, val);
} else {
auto adjBuf = getAdjointBuffer(origEntry, origParam);
// Collect differentiation parameter adjoints.
for (auto i : getIndices().parameters->getIndices())
// Copy them to adjoint indirect results.
assert(indParamAdjoints.size() ==
getPullback().getIndirectResults().size() &&
"Indirect parameter adjoint count mismatch");
for (auto pair : zip(indParamAdjoints,
getPullback().getIndirectResults())) {
auto source = std::get<0>(pair);
auto *dest = std::get<1>(pair);
builder.createCopyAddr(pbLoc, source, dest, IsTake, IsInitialization);
// Prevent source buffer from being deallocated, since the underlying
// value is moved.
// Emit cleanups for all local values.
cleanUpTemporariesForBlock(pbExit, pbLoc);
// Deallocate local allocations.
for (auto alloc : functionLocalAllocations) {
// Assert that local allocations have at least one use.
// Buffers should not be allocated needlessly.
if (!destroyedLocalAllocations.count(alloc)) {
builder.emitDestroyAddrAndFold(pbLoc, alloc);
builder.createDeallocStack(pbLoc, alloc);
builder.createReturn(pbLoc, joinElements(retElts, builder, pbLoc));
#ifndef NDEBUG
bool leakFound = false;
// Ensure all temporaries have been cleaned up.
for (auto &bb : pullback) {
for (auto temp : blockTemporaries[&bb]) {
if (blockTemporarySet.count(temp)) {
leakFound = true;
getADDebugStream() << "Found leaked temporary:\n" << temp;
// Ensure all local allocations have been cleaned up.
for (auto localAlloc : functionLocalAllocations) {
if (!destroyedLocalAllocations.count(localAlloc)) {
leakFound = true;
getADDebugStream() << "Found leaked local buffer:\n" << localAlloc;
assert(!leakFound && "Leaks found!");
LLVM_DEBUG(getADDebugStream() << "Generated pullback for "
<< original.getName() << ":\n" << pullback);
return errorOccurred;
/// If original result is non-varied, it will always have a zero derivative.
/// Skip full pullback generation and simply emit zero derivatives for wrt
/// parameters.
void emitZeroDerivativesForNonvariedResult(SILValue origNonvariedResult) {
auto &pullback = getPullback();
auto pbLoc = getPullback().getLocation();
// TODO(TF-788): Re-enable non-varied result warning.
// Emit fixit if original non-varied result has a valid source location.
auto startLoc = origNonvariedResult.getLoc().getStartSourceLoc();
auto endLoc = origNonvariedResult.getLoc().getEndSourceLoc();
if (startLoc.isValid() && endLoc.isValid()) {
getContext().diagnose(startLoc, diag::autodiff_nonvaried_result_fixit)
.fixItInsert(startLoc, "withoutDerivative(at:")
.fixItInsertAfter(endLoc, ")");
LLVM_DEBUG(getADDebugStream() << getOriginal().getName()
<< " has non-varied result, returning zero"
" for all pullback results\n");
auto *pullbackEntry = pullback.createBasicBlock();
// Destroy all owned arguments.
for (auto *arg : pullbackEntry->getArguments())
if (arg->getOwnershipKind() == ValueOwnershipKind::Owned)
builder.emitDestroyOperation(pbLoc, arg);
// Return zero for each result.
SmallVector<SILValue, 4> directResults;
auto indirectResultIt = pullback.getIndirectResults().begin();
for (auto resultInfo : pullback.getLoweredFunctionType()->getResults()) {
auto resultType =
if (resultInfo.isFormalDirect())
directResults.push_back(emitZeroDirect(resultType, pbLoc));
emitZeroIndirect(resultType, *indirectResultIt++, pbLoc);
builder.createReturn(pbLoc, joinElements(directResults, builder, pbLoc));
LLVM_DEBUG(getADDebugStream() << "Generated pullback for "
<< getOriginal().getName() << ":\n"
<< pullback);
using TrampolineBlockSet = SmallPtrSet<SILBasicBlock *, 4>;
/// Determine the pullback successor block for a given original block and one
/// of its predecessors. When a trampoline block is necessary, emit code into
/// the trampoline block to trampoline the original block's active value's
/// adjoint values. A dense map `trampolineArgs` will be populated to keep
/// track of which pullback successor blocks each active value's adjoint value
/// is used, so that we can release those values in pullback successor blocks
/// that are not using them.
SILBasicBlock *buildPullbackSuccessor(
SILBasicBlock *origBB, SILBasicBlock *origPredBB,
SmallDenseMap<SILValue, TrampolineBlockSet> &pullbackTrampolineBlockMap) {
// Get the pullback block and optional pullback trampoline block of the
// predecessor block.
auto *pullbackBB = getPullbackBlock(origPredBB);
auto *pullbackTrampolineBB = getPullbackTrampolineBlock(origPredBB, origBB);
// If the predecessor block does not have a corresponding pullback
// trampoline block, then the pullback successor is the pullback block.
if (!pullbackTrampolineBB)
return pullbackBB;
// Otherwise, the pullback successor is the pullback trampoline block,
// which branches to the pullback block and propagates adjoint values of
// active values.
assert(pullbackTrampolineBB->getNumArguments() == 1);
auto loc = origBB->getParent()->getLocation();
SmallVector<SILValue, 8> trampolineArguments;
// Propagate adjoint values/buffers of active values/buffers to
// predecessor blocks.
auto &predBBActiveValues = activeValues[origPredBB];
for (auto activeValue : predBBActiveValues) {
<< "Propagating active adjoint " << activeValue
<< " to predecessors' pullback blocks\n");
if (activeValue->getType().isObject()) {
auto activeValueAdj = getAdjointValue(origBB, activeValue);
auto concreteActiveValueAdj =
materializeAdjointDirect(activeValueAdj, loc);
if (!pullbackTrampolineBlockMap.count(concreteActiveValueAdj)) {
concreteActiveValueAdj =
builder.emitCopyValueOperation(loc, concreteActiveValueAdj);
setAdjointValue(origBB, activeValue,
auto insertion = pullbackTrampolineBlockMap.try_emplace(
concreteActiveValueAdj, TrampolineBlockSet());
auto &blockSet = insertion.first->getSecond();
// If the pullback block does not yet have a registered adjoint
// value for the active value, set the adjoint value to the
// forwarded adjoint value argument.
// TODO: Hoist this logic out of loop over predecessor blocks to
// remove the `hasAdjointValue` check.
if (!hasAdjointValue(origPredBB, activeValue)) {
auto *pullbackBBArg =
getActiveValuePullbackBlockArgument(origPredBB, activeValue);
auto forwardedArgAdj = makeConcreteAdjointValue(pullbackBBArg);
setAdjointValue(origPredBB, activeValue, forwardedArgAdj);
} else {
// Propagate adjoint buffers using `copy_addr`.
auto adjBuf = getAdjointBuffer(origBB, activeValue);
auto predAdjBuf = getAdjointBuffer(origPredBB, activeValue);
loc, adjBuf, predAdjBuf, IsNotTake, IsNotInitialization);
// Propagate pullback struct argument.
SILBuilder pullbackTrampolineBBBuilder(pullbackTrampolineBB);
auto *predPBStructVal = pullbackTrampolineBB->getArguments().front();
auto boxType =
if (!boxType) {
} else {
auto *projectBox = pullbackTrampolineBBBuilder.createProjectBox(
loc, predPBStructVal, /*index*/ 0);
auto loaded = pullbackTrampolineBBBuilder.emitLoadValueOperation(
loc, projectBox, LoadOwnershipQualifier::Copy);
// Branch from pullback trampoline block to pullback block.
pullbackTrampolineBBBuilder.createBranch(loc, pullbackBB,
return pullbackTrampolineBB;
/// Emit pullback code in the corresponding pullback block.
void visitSILBasicBlock(SILBasicBlock *bb) {
auto pbLoc = getPullback().getLocation();
// Get the corresponding pullback basic block.
auto *pbBB = getPullbackBlock(bb);
auto &s = getADDebugStream()
<< "Original bb" + std::to_string(bb->getDebugID())
<< ": To differentiate or not to differentiate?\n";
for (auto &inst : llvm::reverse(*bb)) {
s << (getPullbackInfo().shouldDifferentiateInstruction(&inst)
? "[∂] " : "[ ] ")
<< inst;
// Visit each instruction in reverse order.
for (auto &inst : llvm::reverse(*bb)) {
if (!getPullbackInfo().shouldDifferentiateInstruction(&inst))
// Differentiate instruction.
if (errorOccurred)
// Emit a branching terminator for the block.
// If the original block is the original entry, then the pullback block is
// the pullback exit. This is handled specially in `PullbackEmitter::run()`,
// so we leave the block non-terminated.
if (bb->isEntry())
// Otherwise, add a `switch_enum` terminator for non-exit
// pullback blocks.
// 1. Get the pullback struct pullback block argument.
// 2. Extract the predecessor enum value from the pullback struct value.
auto *predEnum = getPullbackInfo().getBranchingTraceDecl(bb);
auto *predEnumField =
auto predEnumVal = getPullbackStructElement(bb, predEnumField);
// Propagate adjoint values from active basic block arguments to
// predecessor terminator operands.
for (auto *bbArg : bb->getArguments()) {
if (!getActivityInfo().isActive(bbArg, getIndices()))
// Get predecessor terminator operands.
SmallVector<std::pair<SILBasicBlock *, SILValue>, 4> incomingValues;
// Initialize adjoint value of predecessor terminator operands as
// adjoint value of current block arguments.
auto bbArgAdj = getAdjointValue(bb, bbArg);
for (auto pair : incomingValues) {
auto *predBB = std::get<0>(pair);
auto incomingValue = std::get<1>(pair);
setAdjointValue(predBB, incomingValue, bbArgAdj);
// 3. Build the pullback successor cases for the `switch_enum`
// instruction. The pullback successors correspond to the predecessors
// of the current block.
SmallVector<std::pair<EnumElementDecl *, SILBasicBlock *>, 4>
// A map from active values' adjoint values to the trampoline blocks that
// are using them.
SmallDenseMap<SILValue, TrampolineBlockSet> pullbackTrampolineBlockMap;
SmallVector<SILBasicBlock *, 8> pullbackSuccBBs;
for (auto *predBB : bb->getPredecessorBlocks()) {
auto *pullbackSuccBB = buildPullbackSuccessor(bb, predBB,
auto *enumEltDecl =
getPullbackInfo().lookUpBranchingTraceEnumElement(predBB, bb);
pullbackSuccessorCases.push_back({enumEltDecl, pullbackSuccBB});
// Values are trampolined by only a subset of pullback successor blocks.
// Other successors blocks should destroy the value to balance the reference
// count.
for (auto pair : pullbackTrampolineBlockMap) {
auto value = pair.getFirst();
// The set of trampoline BBs that are users of `value`.
auto &userTrampolineBBSet = pair.getSecond();
// For each pullback successor block that does not trampoline the value,
// release the value.
for (auto *pullbackSuccBB : pullbackSuccBBs) {
if (userTrampolineBBSet.count(pullbackSuccBB))
SILBuilder builder(pullbackSuccBB->begin());
builder.emitDestroyValueOperation(pbLoc, value);
// Emit cleanups for all block-local temporaries.
cleanUpTemporariesForBlock(pbBB, pbLoc);
// - If the original block has exactly one predecessor, then the pullback
// block has exactly one successor. Extract the pullback struct value
// from the predecessor enum value using `unchecked_take_enum_data_addr`
// and `load [take]`, and branch to the pullback successor block.
assert(pullbackSuccessorCases.size() == predEnum->getNumElements());
pbLoc, predEnumVal, /*DefaultBB*/ nullptr, pullbackSuccessorCases);
void visit(SILInstruction *inst) {
if (errorOccurred)
<< "PullbackEmitter visited:\n[ORIG]" << *inst);
#ifndef NDEBUG
auto beforeInsertion = std::prev(builder.getInsertionPoint());
auto &s = llvm::dbgs() << "[ADJ] Emitted in pullback:\n";
auto afterInsertion = builder.getInsertionPoint();
for (auto it = ++beforeInsertion; it != afterInsertion; ++it)
s << *it;
void visitSILInstruction(SILInstruction *inst) {
<< "Unhandled instruction in PullbackEmitter: " << *inst);
getContext().emitNondifferentiabilityError(inst, getInvoker(),
errorOccurred = true;
AllocStackInst *
emitArrayTangentSubscript(ApplyInst *ai, SILType eltType,
SILValue adjointArray, SILValue fnRef,
CanGenericSignature genericSig, int index) {
auto &ctx = builder.getASTContext();
auto astType = eltType.getASTType();
auto literal = builder.createIntegerLiteral(
ai->getLoc(), SILType::getBuiltinIntegerType(64, ctx), index);
auto intType = SILType::getPrimitiveObjectType(
auto intStruct = builder.createStruct(ai->getLoc(), intType, {literal});
AllocStackInst *subscriptBuffer =
builder.createAllocStack(ai->getLoc(), eltType);
auto swiftModule = getModule().getSwiftModule();
auto diffProto = ctx.getProtocol(KnownProtocolKind::Differentiable);
auto diffConf = swiftModule->lookupConformance(astType, diffProto);
assert(diffConf.hasValue() && "Missing conformance to `Differentiable`");
auto addArithProto = ctx.getProtocol(KnownProtocolKind::AdditiveArithmetic);
auto addArithConf = swiftModule->lookupConformance(astType, addArithProto);
assert(addArithConf.hasValue() &&
"Missing conformance to `AdditiveArithmetic`");
auto subMap =
SubstitutionMap::get(genericSig, {astType}, {*addArithConf, *diffConf});
builder.createApply(ai->getLoc(), fnRef, subMap,
{subscriptBuffer, intStruct, adjointArray});
return subscriptBuffer;
void accumulateArrayTangentSubscriptDirect(ApplyInst *ai, SILType eltType,
StoreInst *si,
AllocStackInst *subscriptBuffer) {
auto newAdjValue = builder.emitLoadValueOperation(
ai->getLoc(), subscriptBuffer, LoadOwnershipQualifier::Take);
SILValue src = si->getSrc();
// When the store's source is a `copy_value`, the `copy_value` is part of
// array literal initialization. In this case, add the adjoint to the source
// of the copy directly.
if (auto *cvi = dyn_cast<CopyValueInst>(src))
src = cvi->getOperand();
addAdjointValue(si->getParent(), src,
makeConcreteAdjointValue(newAdjValue), si->getLoc());
builder.createDeallocStack(ai->getLoc(), subscriptBuffer);
void accumulateArrayTangentSubscriptIndirect(
ApplyInst *ai, CopyAddrInst *cai, AllocStackInst *subscriptBuffer) {
addToAdjointBuffer(cai->getParent(), cai->getSrc(), subscriptBuffer,
builder.emitDestroyAddrAndFold(cai->getLoc(), subscriptBuffer);
builder.createDeallocStack(ai->getLoc(), subscriptBuffer);
void visitArrayInitialization(ApplyInst *ai) {
LLVM_DEBUG(getADDebugStream() << "Visiting array initialization:\n" << *ai);
SILValue adjointArray;
SILValue fnRef;
CanGenericSignature genericSig;
for (auto use : ai->getUses()) {
auto *dti = dyn_cast<DestructureTupleInst>(use->getUser());
if (!dti) continue;
// The first tuple field of the return value is the `Array`.
adjointArray = getAdjointValue(ai->getParent(), dti->getResult(0))
assert(adjointArray && "Array does not have adjoint value");
auto astType = adjointArray->getType().getASTType();
auto typeDecl = astType->getStructOrBoundGenericStruct();
auto subscriptDecl = cast<SubscriptDecl>(typeDecl->lookupDirect(
auto subscriptGet = subscriptDecl->getAccessor(AccessorKind::Get);
SILDeclRef subscriptRef(subscriptGet, SILDeclRef::Kind::Func);
auto fnBuilder = SILOptFunctionBuilder(getContext().getTransform());
auto fn = fnBuilder.getOrCreateFunction(
ai->getLoc(), subscriptRef, NotForDefinition);
genericSig = fn->getLoweredFunctionType()->getGenericSignature();
fnRef = builder.createFunctionRef(ai->getLoc(), fn);
assert(adjointArray && "Array does not have adjoint value");
assert(genericSig && "No generic signature");
assert(fnRef && "Could not create `function_ref`");
// Two loops because the `tuple_extract` instructions can be reached in
// either order.
for (auto use : ai->getUses()) {
auto *dti = dyn_cast<DestructureTupleInst>(use->getUser());
if (!dti) continue;
// The second tuple field is the `RawPointer`.
for (auto use : dti->getResult(1)->getUses()) {
// The `RawPointer` passes through a `pointer_to_address`. That
// instruction's first use is a `store` whose src is useful; its
// subsequent uses are `index_addr`s whose only use is a useful
// `store`. In the indirect case, each `store` is instead a
// `copy_addr`.
for (auto use : use->getUser()->getResult(0)->getUses()) {
auto inst = use->getUser();
if (auto si = dyn_cast<StoreInst>(inst)) {
auto tanType = getRemappedTangentType(si->getSrc()->getType());
auto subscriptBuffer = emitArrayTangentSubscript(
ai, tanType, adjointArray, fnRef, genericSig, 0);
ai, tanType, si, subscriptBuffer);
} else if (auto cai = dyn_cast<CopyAddrInst>(inst)) {
auto tanType = getRemappedTangentType(cai->getSrc()->getType());
auto subscriptBuffer = emitArrayTangentSubscript(
ai, tanType, adjointArray, fnRef, genericSig, 0);
ai, cai, subscriptBuffer);
} else if (auto iai = dyn_cast<IndexAddrInst>(inst)) {
for (auto use : iai->getUses()) {
if (auto si = dyn_cast<StoreInst>(use->getUser())) {
auto literal = dyn_cast<IntegerLiteralInst>(iai->getIndex());
auto tanType = getRemappedTangentType(
auto subscriptBuffer = emitArrayTangentSubscript(
ai, tanType, adjointArray, fnRef,
genericSig, literal->getValue().getLimitedValue());
ai, tanType, si, subscriptBuffer);
} else if (auto cai = dyn_cast<CopyAddrInst>(use->getUser())) {
auto literal = dyn_cast<IntegerLiteralInst>(iai->getIndex());
auto tanType = getRemappedTangentType(
auto subscriptBuffer = emitArrayTangentSubscript(
ai, tanType, adjointArray, fnRef,
genericSig, literal->getValue().getLimitedValue());
ai, cai, subscriptBuffer);
void visitApplyInst(ApplyInst *ai) {
// Handle array uninitialized allocation intrinsic specially.
if (isArrayLiteralIntrinsic(ai))
return visitArrayInitialization(ai);
// Replace a call to a function with a call to its pullback.
auto &nestedApplyInfo = getContext().getNestedApplyInfo();
auto applyInfoLookup = nestedApplyInfo.find(ai);
// If no `NestedApplyInfo` was found, then this task doesn't need to be
// differentiated.
if (applyInfoLookup == nestedApplyInfo.end()) {
// Must not be active.
assert(!getActivityInfo().isActive(ai, getIndices()));
auto applyInfo = applyInfoLookup->getSecond();
// Get the pullback.
auto *field = getPullbackInfo().lookUpLinearMapDecl(ai);
auto loc = ai->getLoc();
auto pullback = getPullbackStructElement(ai->getParent(), field);
// Get the original result of the `apply` instruction.
SmallVector<SILValue, 8> args;
SmallVector<SILValue, 8> origDirectResults;
forEachApplyDirectResult(ai, [&](SILValue directResult) {
SmallVector<SILValue, 8> origAllResults;
collectAllActualResultsInTypeOrder(ai, origDirectResults, origAllResults);
assert(applyInfo.indices.source < origAllResults.size());
auto origResult = origAllResults[applyInfo.indices.source];
auto origNumIndRes = ai->getNumIndirectResults();
auto pullbackType =
// Get the seed (i.e. adjoint value of the original result).
SILValue seed;
auto *bb = ai->getParent();
if (origResult->getType().isObject()) {
// Otherwise, materialize adjoint value of `ai`.
seed = materializeAdjoint(getAdjointValue(bb, origResult), loc);
} else {
seed = getAdjointBuffer(bb, origResult);
// Create allocations for pullback indirect results.
SmallVector<AllocStackInst *, 4> pullbackIndirectResults;
auto actualPullbackType = applyInfo.originalPullbackType
? *applyInfo.originalPullbackType
: pullbackType;
for (auto indRes : actualPullbackType->getIndirectFormalResults()) {
auto *alloc =
builder.createAllocStack(loc, remapType(indRes.getSILStorageType()));
// If callee pullback was reabstracted in VJP, reabstract callee pullback.
if (applyInfo.originalPullbackType) {
SILOptFunctionBuilder fb(getContext().getTransform());
auto *thunk = getOrCreateReabstractionThunk(
fb, getContext().getModule(), loc, &getPullback(),
pullbackType, *applyInfo.originalPullbackType);
auto *thunkRef = builder.createFunctionRef(loc, thunk);
pullback = builder.createPartialApply(
loc, thunkRef,
{pullback}, pullbackType->getCalleeConvention());
// Call the callee pullback.
auto *pullbackCall = builder.createApply(
loc, pullback, SubstitutionMap(), args, /*isNonThrowing*/ false);
builder.emitDestroyValueOperation(loc, pullback);
// Extract all results from `pullbackCall`.
SmallVector<SILValue, 8> dirResults;
extractAllElements(pullbackCall, builder, dirResults);
// Get all results in type-defined order.
SmallVector<SILValue, 8> allResults;
collectAllActualResultsInTypeOrder(pullbackCall, dirResults, allResults);
auto &s = getADDebugStream();
s << "All results of the nested pullback call:\n";
llvm::for_each(allResults, [&](SILValue v) { s << v; });
// Accumulate adjoints for original differentiation parameters.
auto allResultsIt = allResults.begin();
for (unsigned i : applyInfo.indices.parameters->getIndices()) {
auto origArg = ai->getArgument(origNumIndRes + i);
auto tan = *allResultsIt++;
if (tan->getType().isAddress()) {
addToAdjointBuffer(bb, origArg, tan, loc);
} else {
if (origArg->getType().isAddress()) {
auto *tmpBuf = builder.createAllocStack(loc, tan->getType());
builder.emitStoreValueOperation(loc, tan, tmpBuf,
addToAdjointBuffer(bb, origArg, tmpBuf, loc);
builder.emitDestroyAddrAndFold(loc, tmpBuf);
builder.createDeallocStack(loc, tmpBuf);
else {
addAdjointValue(bb, origArg, makeConcreteAdjointValue(tan), loc);
// Destroy and deallocate pullback indirect results.
for (auto *alloc : llvm::reverse(pullbackIndirectResults)) {
builder.emitDestroyAddrAndFold(loc, alloc);
builder.createDeallocStack(loc, alloc);
/// Handle `struct` instruction.
/// Original: y = struct (x0, x1, x2, ...)
/// Adjoint: adj[x0] += struct_extract adj[y], #x0
/// adj[x1] += struct_extract adj[y], #x1
/// adj[x2] += struct_extract adj[y], #x2
/// ...
void visitStructInst(StructInst *si) {
auto *bb = si->getParent();
auto loc = si->getLoc();
auto *structDecl = si->getStructDecl();
auto av = getAdjointValue(bb, si);
switch (av.getKind()) {
case AdjointValueKind::Zero:
for (auto *field : structDecl->getStoredProperties()) {
auto fv = si->getFieldValue(field);
addAdjointValue(bb, fv,
makeZeroAdjointValue(getRemappedTangentType(fv->getType())), loc);
case AdjointValueKind::Concrete: {
auto adjStruct = materializeAdjointDirect(std::move(av), loc);
// Find the struct `TangentVector` type.
auto structTy = remapType(si->getType()).getASTType();
auto tangentVectorTy =
tangentVectorTy, ResilienceExpansion::Minimal)
auto *tangentVectorDecl =
auto *dti = builder.createDestructureStruct(si->getLoc(), adjStruct);
// Accumulate adjoints for the fields of the `struct` operand.
unsigned fieldIndex = 0;
for (auto it = structDecl->getStoredProperties().begin();
it != structDecl->getStoredProperties().end(); ++it, ++fieldIndex) {
VarDecl *field = *it;
if (field->getAttrs().hasAttribute<NoDerivativeAttr>())
// Find the corresponding field in the tangent space.
VarDecl *tanField = nullptr;
if (tangentVectorDecl == structDecl)
tanField = field;
// Otherwise, look up the field by name.
else {
auto tanFieldLookup =
if (tanFieldLookup.empty()) {
si, getInvoker(),
tangentVectorDecl->getNameStr(), field->getNameStr());
errorOccurred = true;
tanField = cast<VarDecl>(tanFieldLookup.front());
auto tanElt = dti->getResult(fieldIndex);
bb, si->getFieldValue(field),
makeConcreteAdjointValue(tanElt), si->getLoc());
case AdjointValueKind::Aggregate: {
// Note: All user-called initializations go through the calls to the
// initializer, and synthesized initializers only have one level of struct
// formation which will not result into any aggregate adjoint valeus.
llvm_unreachable("Aggregate adjoint values should not occur for `struct` "
/// Handle `struct_extract` instruction.
/// Original: y = struct_extract x, #field
/// Adjoint: adj[x] += struct (0, ..., #field': adj[y], ..., 0)
/// ^~~~~~~
/// field in tangent space corresponding to #field
void visitStructExtractInst(StructExtractInst *sei) {
assert(!sei->getField()->getAttrs().hasAttribute<NoDerivativeAttr>() &&
"`struct_extract` with `@noDerivative` field should not be "
"differentiated; activity analysis should not marked as varied");
auto *bb = sei->getParent();
auto structTy = remapType(sei->getOperand()->getType()).getASTType();
auto tangentVectorTy =
tangentVectorTy, ResilienceExpansion::Minimal)
auto tangentVectorSILTy =
auto *tangentVectorDecl =
// Find the corresponding field in the tangent space.
VarDecl *tanField = nullptr;
// If the tangent space is the original struct, then field is the same.
if (tangentVectorDecl == sei->getStructDecl())
tanField = sei->getField();
// Otherwise, look up the field by name.
else {
auto tanFieldLookup =
if (tanFieldLookup.empty()) {
sei, getInvoker(),
errorOccurred = true;
tanField = cast<VarDecl>(tanFieldLookup.front());
// Accumulate adjoint for the `struct_extract` operand.
auto av = getAdjointValue(bb, sei);
switch (av.getKind()) {
case AdjointValueKind::Zero:
addAdjointValue(bb, sei->getOperand(),
makeZeroAdjointValue(tangentVectorSILTy), sei->getLoc());
case AdjointValueKind::Concrete:
case AdjointValueKind::Aggregate: {
SmallVector<AdjointValue, 8> eltVals;
for (auto *field : tangentVectorDecl->getStoredProperties()) {
if (field == tanField) {
} else {
auto substMap = tangentVectorTy->getMemberSubstitutionMap(
field->getModuleContext(), field);
auto fieldTy = field->getType().subst(substMap);
auto fieldSILTy =
fieldTy, ResilienceExpansion::Minimal);
addAdjointValue(bb, sei->getOperand(),
makeAggregateAdjointValue(tangentVectorSILTy, eltVals),
/// Handle `tuple` instruction.
/// Original: y = tuple (x0, x1, x2, ...)
/// Adjoint: adj[x0] += tuple_extract adj[y], 0
/// ...
void visitTupleInst(TupleInst *ti) {
auto *bb = ti->getParent();
auto av = getAdjointValue(bb, ti);
switch (av.getKind()) {
case AdjointValueKind::Zero:
for (auto eltVal : ti->getElements()) {
if (!getTangentSpace(eltVal->getType().getASTType()))
addAdjointValue(bb, eltVal,
case AdjointValueKind::Concrete: {
auto val = av.getConcreteValue();
unsigned adjIdx = 0;
auto elts = builder.createDestructureTuple(ti->getLoc(), val);
for (auto i : range(ti->getNumOperands())) {
if (!getTangentSpace(ti->getOperand(i)->getType().getASTType()))
auto adjElt = val;
if (val->getType().is<TupleType>())
adjElt = elts->getResult(adjIdx++);
addAdjointValue(bb, ti->getOperand(i),
makeConcreteAdjointValue(adjElt), ti->getLoc());
case AdjointValueKind::Aggregate:
unsigned adjIdx = 0;
for (auto i : range(ti->getElements().size())) {
if (!getTangentSpace(ti->getElement(i)->getType().getASTType()))
addAdjointValue(bb, ti->getElement(i), av.getAggregateElement(adjIdx++),
/// Handle `tuple_extract` instruction.
/// Original: y = tuple_extract x, <n>
/// Adjoint: adj[x] += tuple (0, 0, ..., adj[y], ..., 0, 0)
/// ^~~~~~
/// n'-th element, where n' is tuple tangent space
/// index corresponding to n
void visitTupleExtractInst(TupleExtractInst *tei) {
auto *bb = tei->getParent();
auto tupleTanTy = getRemappedTangentType(tei->getOperand()->getType());
auto av = getAdjointValue(bb, tei);
switch (av.getKind()) {
case AdjointValueKind::Zero:
addAdjointValue(bb, tei->getOperand(), makeZeroAdjointValue(tupleTanTy),
case AdjointValueKind::Aggregate:
case AdjointValueKind::Concrete: {
auto tupleTy = tei->getTupleType();
auto tupleTanTupleTy = tupleTanTy.getAs<TupleType>();
if (!tupleTanTupleTy) {
addAdjointValue(bb, tei->getOperand(), av, tei->getLoc());
SmallVector<AdjointValue, 8> elements;
unsigned adjIdx = 0;
for (unsigned i : range(tupleTy->getNumElements())) {
if (!getTangentSpace(
if (tei->getFieldNo() == i)
if (elements.size() == 1) {
addAdjointValue(bb, tei->getOperand(), elements.front(), tei->getLoc());
addAdjointValue(bb, tei->getOperand(),
makeAggregateAdjointValue(tupleTanTy, elements), tei->getLoc());
/// Handle `destructure_tuple` instruction.
/// Original: (y0, ..., yn) = destructure_tuple x
/// Adjoint: adj[x].0 += adj[y0]
/// ...
/// adj[x].n += adj[yn]
void visitDestructureTupleInst(DestructureTupleInst *dti) {
auto *bb = dti->getParent();
auto tupleTanTy = getRemappedTangentType(dti->getOperand()->getType());
SmallVector<AdjointValue, 8> adjValues;
for (auto origElt : dti->getResults()) {
if (!getTangentSpace(origElt->getType().getASTType()))
adjValues.push_back(getAdjointValue(bb, origElt));
addAdjointValue(bb, dti->getOperand(),
makeAggregateAdjointValue(tupleTanTy, adjValues),
/// Handle `load` or `load_borrow` instruction
/// Original: y = load/load_borrow x
/// Adjoint: adj[x] += adj[y]
void visitLoadOperation(SingleValueInstruction *inst) {
assert(isa<LoadInst>(inst) || isa<LoadBorrowInst>(inst));
auto *bb = inst->getParent();
auto adjVal =
materializeAdjointDirect(getAdjointValue(bb, inst), inst->getLoc());
// Allocate a local buffer and store the adjoint value. This buffer will be
// used for accumulation into the adjoint buffer.
auto *localBuf = builder.createAllocStack(inst->getLoc(), adjVal->getType());
auto copy = builder.emitCopyValueOperation(inst->getLoc(), adjVal);
builder.emitStoreValueOperation(inst->getLoc(), copy, localBuf,
// Accumulate the adjoint value in the local buffer into the adjoint buffer.
addToAdjointBuffer(bb, inst->getOperand(0), localBuf, inst->getLoc());
builder.emitDestroyAddr(inst->getLoc(), localBuf);
builder.createDeallocStack(inst->getLoc(), localBuf);
void visitLoadInst(LoadInst *li) { visitLoadOperation(li); }
void visitLoadBorrowInst(LoadBorrowInst *lbi) { visitLoadOperation(lbi); }
/// Handle `store` or `store_borrow` instruction.
/// Original: store/store_borrow x to y
/// Adjoint: adj[x] += load adj[y]; adj[y] = 0
void visitStoreOperation(SILBasicBlock *bb, SILLocation loc,
SILValue origSrc, SILValue origDest) {
auto &adjBuf = getAdjointBuffer(bb, origDest);
auto bufType = remapType(adjBuf->getType());
auto adjVal = builder.emitLoadValueOperation(
loc, adjBuf, LoadOwnershipQualifier::Take);
addAdjointValue(bb, origSrc, makeConcreteAdjointValue(adjVal), loc);
emitZeroIndirect(bufType.getASTType(), adjBuf, loc);
void visitStoreInst(StoreInst *si) {
si->getParent(), si->getLoc(), si->getSrc(), si->getDest());
void visitStoreBorrowInst(StoreBorrowInst *sbi) {
sbi->getParent(), sbi->getLoc(), sbi->getSrc(), sbi->getDest());
/// Handle `copy_addr` instruction.
/// Original: copy_addr x to y
/// Adjoint: adj[x] += adj[y]; adj[y] = 0
void visitCopyAddrInst(CopyAddrInst *cai) {
auto *bb = cai->getParent();
auto &adjDest = getAdjointBuffer(bb, cai->getDest());
auto destType = remapType(adjDest->getType());
addToAdjointBuffer(bb, cai->getSrc(), adjDest, cai->getLoc());
builder.emitDestroyAddrAndFold(cai->getLoc(), adjDest);
emitZeroIndirect(destType.getASTType(), adjDest, cai->getLoc());
/// Handle `copy_value` instruction.
/// Original: y = copy_value x
/// Adjoint: adj[x] += adj[y]
void visitCopyValueInst(CopyValueInst *cvi) {
auto *bb = cvi->getParent();
auto adj = getAdjointValue(bb, cvi);
addAdjointValue(bb, cvi->getOperand(), adj, cvi->getLoc());
/// Handle `begin_borrow` instruction.
/// Original: y = begin_borrow x
/// Adjoint: adj[x] += adj[y]
void visitBeginBorrowInst(BeginBorrowInst *bbi) {
auto *bb = bbi->getParent();
auto adj = getAdjointValue(bb, bbi);
addAdjointValue(bb, bbi->getOperand(), adj, bbi->getLoc());
/// Handle `begin_access` instruction.
/// Original: y = begin_access x
/// Adjoint: nothing
void visitBeginAccessInst(BeginAccessInst *bai) {
// Check for non-differentiable writes.
if (bai->getAccessKind() == SILAccessKind::Modify) {
if (auto *gai = dyn_cast<GlobalAddrInst>(bai->getSource())) {
getContext().emitNondifferentiabilityError(bai, getInvoker(),
errorOccurred = true;
if (auto *pbi = dyn_cast<ProjectBoxInst>(bai->getSource())) {
getContext().emitNondifferentiabilityError(bai, getInvoker(),
errorOccurred = true;
/// Handle `unconditional_checked_cast_addr` instruction.
/// Original: y = unconditional_checked_cast_addr x
/// Adjoint: adj[x] += unconditional_checked_cast_addr adj[y]
void visitUnconditionalCheckedCastAddrInst(
UnconditionalCheckedCastAddrInst *uccai) {
auto *bb = uccai->getParent();
auto &adjDest = getAdjointBuffer(bb, uccai->getDest());
auto &adjSrc = getAdjointBuffer(bb, uccai->getSrc());
auto destType = remapType(adjDest->getType());
auto castBuf = builder.createAllocStack(uccai->getLoc(), adjSrc->getType());
uccai->getLoc(), adjDest, adjDest->getType().getASTType(), castBuf,
addToAdjointBuffer(bb, uccai->getSrc(), castBuf, uccai->getLoc());
builder.emitDestroyAddrAndFold(uccai->getLoc(), castBuf);
builder.createDeallocStack(uccai->getLoc(), castBuf);
emitZeroIndirect(destType.getASTType(), adjDest, uccai->getLoc());
void visit##INST##Inst(INST##Inst *inst) { \
getContext().emitNondifferentiabilityError( \
inst, getInvoker(), diag::DIAG); \
errorOccurred = true; \
return; \
NOT_DIFFERENTIABLE(RefElementAddr, autodiff_class_property_not_supported)
#define NO_ADJOINT(INST) \
void visit##INST##Inst(INST##Inst *inst) {}
// Terminators.
// Buffer projection.
// Memory allocation/access.
// Debugging/reference counting instructions.
// Value ownership.
} // end anonymous namespace
AdjointValue PullbackEmitter::makeZeroAdjointValue(SILType type) {
return AdjointValue::createZero(allocator, remapType(type));
PullbackEmitter::makeConcreteAdjointValue(SILValue value) {
return AdjointValue::createConcrete(allocator, value);
template<typename EltRange>
AdjointValue PullbackEmitter::makeAggregateAdjointValue(
SILType type, EltRange elements) {
return AdjointValue::createAggregate(allocator, remapType(type), elements);
SILValue PullbackEmitter::materializeAdjointDirect(
AdjointValue val, SILLocation loc) {
LLVM_DEBUG(getADDebugStream() <<
"Materializing adjoints for " << val << '\n');
switch (val.getKind()) {
case AdjointValueKind::Zero:
return recordTemporary(emitZeroDirect(val.getType().getASTType(), loc));
case AdjointValueKind::Aggregate: {
SmallVector<SILValue, 8> elements;
for (auto i : range(val.getNumAggregateElements())) {
auto eltVal = materializeAdjointDirect(val.getAggregateElement(i), loc);
elements.push_back(builder.emitCopyValueOperation(loc, eltVal));
if (val.getType().is<TupleType>())
return recordTemporary(
builder.createTuple(loc, val.getType(), elements));
return recordTemporary(
builder.createStruct(loc, val.getType(), elements));
case AdjointValueKind::Concrete:
return val.getConcreteValue();
SILValue PullbackEmitter::materializeAdjoint(AdjointValue val,
SILLocation loc) {
if (val.isConcrete()) {
<< "Materializing adjoint: Value is concrete.\n");
return val.getConcreteValue();
LLVM_DEBUG(getADDebugStream() << "Materializing adjoint: Value is "
"non-concrete. Materializing directly.\n");
return materializeAdjointDirect(val, loc);
void PullbackEmitter::materializeAdjointIndirect(
AdjointValue val, SILValue destBufferAccess, SILLocation loc) {
switch (val.getKind()) {
/// Given a `%buf : *T, emit instructions that produce a zero or an aggregate
/// of zeros of the expected type. When `T` conforms to
/// `AdditiveArithmetic`, we emit a call to ``. When
/// `T` is a builtin float, we emit a `float_literal` instruction.
/// Otherwise, we assert that `T` must be an aggregate where each element
/// conforms to `AdditiveArithmetic` or is a builtin float. We expect to emit
/// a zero for each element and use the appropriate aggregate constructor
/// instruction (in this case, `tuple`) to produce a tuple. But currently,
/// since we need indirect passing for aggregate instruction, we just use
/// `tuple_element_addr` to get element buffers and write elements to them.
case AdjointValueKind::Zero:
emitZeroIndirect(val.getSwiftType(), destBufferAccess, loc);
/// Given a `%buf : *(T0, T1, T2, ...)` or `%buf : *Struct` recursively emit
/// instructions to materialize the symbolic tuple or struct, filling the
/// buffer.
case AdjointValueKind::Aggregate: {
if (auto *tupTy = val.getSwiftType()->getAs<TupleType>()) {
for (auto idx : range(val.getNumAggregateElements())) {
auto eltTy = SILType::getPrimitiveAddressType(
auto *eltBuf =
builder.createTupleElementAddr(loc, destBufferAccess, idx, eltTy);
val.getAggregateElement(idx), eltBuf, loc);
} else if (auto *structDecl =
val.getSwiftType()->getStructOrBoundGenericStruct()) {
auto fieldIt = structDecl->getStoredProperties().begin();
for (unsigned i = 0; fieldIt != structDecl->getStoredProperties().end();
++fieldIt, ++i) {
auto eltBuf =
builder.createStructElementAddr(loc, destBufferAccess, *fieldIt);
val.getAggregateElement(i), eltBuf, loc);
} else {
llvm_unreachable("Not an aggregate type");
/// Value is already materialized!
case AdjointValueKind::Concrete:
auto concreteVal = val.getConcreteValue();
builder.emitStoreValueOperation(loc, concreteVal, destBufferAccess,
void PullbackEmitter::emitZeroIndirect(CanType type, SILValue bufferAccess,
SILLocation loc) {
auto tangentSpace = getTangentSpace(type);
assert(tangentSpace && "No tangent space for this type");
switch (tangentSpace->getKind()) {
case VectorSpace::Kind::Vector:
emitZeroIntoBuffer(builder, type, bufferAccess, loc);
case VectorSpace::Kind::Tuple: {
auto tupleType = tangentSpace->getTuple();
SmallVector<SILValue, 8> zeroElements;
for (unsigned i : range(tupleType->getNumElements())) {
auto eltAddr = builder.createTupleElementAddr(loc, bufferAccess, i);
eltAddr, loc);
case VectorSpace::Kind::Function: {
"Unimplemented: Emit thunks for abstracting zero initialization");
SILValue PullbackEmitter::emitZeroDirect(CanType type, SILLocation loc) {
auto silType = getModule().Types.getLoweredLoadableType(
type, ResilienceExpansion::Minimal, getModule());
auto *buffer = builder.createAllocStack(loc, silType);
emitZeroIndirect(type, buffer, loc);
auto loaded = builder.emitLoadValueOperation(
loc, buffer, LoadOwnershipQualifier::Take);
builder.createDeallocStack(loc, buffer);
return loaded;
PullbackEmitter::accumulateAdjointsDirect(AdjointValue lhs, AdjointValue rhs,
SILLocation loc) {
<< "Materializing adjoint directly.\nLHS: " << lhs
<< "\nRHS: " << rhs << '\n');
switch (lhs.getKind()) {
// x
case AdjointValueKind::Concrete: {
auto lhsVal = lhs.getConcreteValue();
switch (rhs.getKind()) {
// x + y
case AdjointValueKind::Concrete: {
auto rhsVal = rhs.getConcreteValue();
auto sum = recordTemporary(accumulateDirect(lhsVal, rhsVal, loc));
return makeConcreteAdjointValue(sum);
// x + 0 => x
case AdjointValueKind::Zero:
return lhs;
// x + (y, z) => (x.0 + y, x.1 + z)
case AdjointValueKind::Aggregate:
SmallVector<AdjointValue, 8> newElements;
auto lhsTy = lhsVal->getType().getASTType();
auto lhsValCopy = builder.emitCopyValueOperation(loc, lhsVal);
if (auto *tupTy = lhsTy->getAs<TupleType>()) {
auto elts = builder.createDestructureTuple(loc, lhsValCopy);
[this](SILValue result) { recordTemporary(result); });
for (auto i : indices(elts->getResults())) {
auto rhsElt = rhs.getAggregateElement(i);
makeConcreteAdjointValue(elts->getResult(i)), rhsElt, loc));
} else if (auto *structDecl = lhsTy->getStructOrBoundGenericStruct()) {
auto elts =
builder.createDestructureStruct(lhsVal.getLoc(), lhsValCopy);
[this](SILValue result) { recordTemporary(result); });
for (unsigned i : indices(elts->getResults())) {
auto rhsElt = rhs.getAggregateElement(i);
makeConcreteAdjointValue(elts->getResult(i)), rhsElt, loc));
} else {
llvm_unreachable("Not an aggregate type");
return makeAggregateAdjointValue(lhsVal->getType(), newElements);
// 0
case AdjointValueKind::Zero:
// 0 + x => x
return rhs;
// (x, y)
case AdjointValueKind::Aggregate:
switch (rhs.getKind()) {
// (x, y) + z => (x + z.0, y + z.1)
case AdjointValueKind::Concrete:
// x + 0 => x
case AdjointValueKind::Zero:
return lhs;
// (x, y) + (z, w) => (x + z, y + w)
case AdjointValueKind::Aggregate: {
SmallVector<AdjointValue, 8> newElements;
for (auto i : range(lhs.getNumAggregateElements()))
return makeAggregateAdjointValue(lhs.getType(), newElements);
SILValue PullbackEmitter::accumulateDirect(SILValue lhs, SILValue rhs,
SILLocation loc) {
// TODO: Optimize for the case when lhs == rhs.
LLVM_DEBUG(getADDebugStream() <<
"Emitting adjoint accumulation for lhs: " << lhs <<
" and rhs: " << rhs << "\n");
assert(lhs->getType() == rhs->getType() && "Adjoints must have equal types!");
assert(lhs->getType().isObject() && rhs->getType().isObject() &&
"Adjoint types must be both object types!");
auto adjointTy = lhs->getType();
auto adjointASTTy = adjointTy.getASTType();
auto tangentSpace = getTangentSpace(adjointASTTy);
auto lhsCopy = builder.emitCopyValueOperation(loc, lhs);
auto rhsCopy = builder.emitCopyValueOperation(loc, rhs);
assert(tangentSpace && "No tangent space for this type");
switch (tangentSpace->getKind()) {
case VectorSpace::Kind::Vector: {
// Allocate buffers for inputs and output.
auto *resultBuf = builder.createAllocStack(loc, adjointTy);
auto *lhsBuf = builder.createAllocStack(loc, adjointTy);
auto *rhsBuf = builder.createAllocStack(loc, adjointTy);
// Initialize input buffers.
builder.emitStoreValueOperation(loc, lhsCopy, lhsBuf,
builder.emitStoreValueOperation(loc, rhsCopy, rhsBuf,
accumulateIndirect(resultBuf, lhsBuf, rhsBuf, loc);
builder.emitDestroyAddr(loc, lhsBuf);
builder.emitDestroyAddr(loc, rhsBuf);
// Deallocate input buffers.
builder.createDeallocStack(loc, rhsBuf);
builder.createDeallocStack(loc, lhsBuf);
auto val = builder.emitLoadValueOperation(
loc, resultBuf, LoadOwnershipQualifier::Take);
// Deallocate result buffer.
builder.createDeallocStack(loc, resultBuf);
return val;
case VectorSpace::Kind::Tuple: {
SmallVector<SILValue, 8> adjElements;
auto lhsElts = builder.createDestructureTuple(loc, lhsCopy)->getResults();
auto rhsElts = builder.createDestructureTuple(loc, rhsCopy)->getResults();
for (auto zipped : llvm::zip(lhsElts, rhsElts))
accumulateDirect(std::get<0>(zipped), std::get<1>(zipped), loc));
return builder.createTuple(loc, adjointTy, adjElements);
case VectorSpace::Kind::Function: {
"Unimplemented: Emit thunks for abstracting adjoint accumulation");
void PullbackEmitter::accumulateIndirect(
SILValue resultBufAccess, SILValue lhsBufAccess, SILValue rhsBufAccess,
SILLocation loc) {
// TODO: Optimize for the case when lhs == rhs.
assert(lhsBufAccess->getType() == rhsBufAccess->getType() &&
"Adjoint values must have same type!");
assert(lhsBufAccess->getType().isAddress() &&
rhsBufAccess->getType().isAddress() &&
"Adjoint values must both have address types!");
auto adjointTy = lhsBufAccess->getType();
auto adjointASTTy = adjointTy.getASTType();
auto *swiftMod = getModule().getSwiftModule();
auto tangentSpace = adjointASTTy->getAutoDiffAssociatedTangentSpace(
assert(tangentSpace && "No tangent space for this type");
switch (tangentSpace->getKind()) {
case VectorSpace::Kind::Vector: {
auto *proto = getContext().getAdditiveArithmeticProtocol();
auto *combinerFuncDecl = getContext().getPlusDecl();
// Call the combiner function and return.
auto adjointParentModule = tangentSpace->getNominal()
? tangentSpace->getNominal()->getModuleContext()
: getModule().getSwiftModule();
auto confRef = adjointParentModule->lookupConformance(adjointASTTy,
assert(confRef.hasValue() && "Missing conformance to `AdditiveArithmetic`");
SILDeclRef declRef(combinerFuncDecl, SILDeclRef::Kind::Func);
auto silFnTy = getContext().getTypeConverter().getConstantType(declRef);
// %0 = witness_method @+
auto witnessMethod = builder.createWitnessMethod(loc, adjointASTTy,
*confRef, declRef,
auto subMap = SubstitutionMap::getProtocolSubstitutions(
proto, adjointASTTy, *confRef);
// %1 = metatype $T.Type
auto metatypeType =
CanMetatypeType::get(adjointASTTy, MetatypeRepresentation::Thick);
auto metatypeSILType = SILType::getPrimitiveObjectType(metatypeType);
auto metatype = builder.createMetatype(loc, metatypeSILType);
// %2 = apply $0(%result, %new, %old, %1)
builder.createApply(loc, witnessMethod, subMap,
{resultBufAccess, rhsBufAccess, lhsBufAccess, metatype},
/*isNonThrowing*/ false);
builder.emitDestroyValueOperation(loc, witnessMethod);
case VectorSpace::Kind::Tuple: {
auto tupleType = tangentSpace->getTuple();
for (unsigned i : range(tupleType->getNumElements())) {
auto *destAddr = builder.createTupleElementAddr(loc, resultBufAccess, i);
auto *eltAddrLHS = builder.createTupleElementAddr(loc, lhsBufAccess, i);
auto *eltAddrRHS = builder.createTupleElementAddr(loc, rhsBufAccess, i);
accumulateIndirect(destAddr, eltAddrLHS, eltAddrRHS, loc);
case VectorSpace::Kind::Function: {
"Unimplemented: Emit thunks for abstracting adjoint value "
void PullbackEmitter::accumulateIndirect(SILValue lhsDestAccess,
SILValue rhsAccess, SILLocation loc) {
assert(lhsDestAccess->getType().isAddress() &&
assert(lhsDestAccess->getFunction() == &getPullback());
assert(rhsAccess->getFunction() == &getPullback());
auto type = lhsDestAccess->getType();
auto astType = type.getASTType();
auto *swiftMod = getModule().getSwiftModule();
auto tangentSpace = astType->getAutoDiffAssociatedTangentSpace(
assert(tangentSpace && "No tangent space for this type");
switch (tangentSpace->getKind()) {
case VectorSpace::Kind::Vector: {
auto *proto = getContext().getAdditiveArithmeticProtocol();
auto *accumulatorFuncDecl = getContext().getPlusEqualDecl();
// Call the combiner function and return.
auto confRef = swiftMod->lookupConformance(astType, proto);
assert(confRef.hasValue() && "Missing conformance to `AdditiveArithmetic`");
SILDeclRef declRef(accumulatorFuncDecl, SILDeclRef::Kind::Func);
auto silFnTy = getContext().getTypeConverter().getConstantType(declRef);
// %0 = witness_method @+=
auto witnessMethod =
builder.createWitnessMethod(loc, astType, *confRef, declRef, silFnTy);
auto subMap =
SubstitutionMap::getProtocolSubstitutions(proto, astType, *confRef);
// %1 = metatype $T.Type
auto metatypeType =
CanMetatypeType::get(astType, MetatypeRepresentation::Thick);
auto metatypeSILType = SILType::getPrimitiveObjectType(metatypeType);
auto metatype = builder.createMetatype(loc, metatypeSILType);
// %2 = apply $0(%lhs, %rhs, %1)
builder.createApply(loc, witnessMethod, subMap,
{lhsDestAccess, rhsAccess, metatype},
/*isNonThrowing*/ false);
builder.emitDestroyValueOperation(loc, witnessMethod);
case VectorSpace::Kind::Tuple: {
auto tupleType = tangentSpace->getTuple();
for (unsigned i : range(tupleType->getNumElements())) {
auto *destAddr = builder.createTupleElementAddr(loc, lhsDestAccess, i);
auto *eltAddrRHS = builder.createTupleElementAddr(loc, rhsAccess, i);
accumulateIndirect(destAddr, eltAddrRHS, loc);
case VectorSpace::Kind::Function: {
"Unimplemented: Emit thunks for abstracting adjoint value "
bool VJPEmitter::run() {
<< "Cloning original @" << original->getName()
<< " to vjp @" << vjp->getName() << '\n');
// Create entry BB and arguments.
auto *entry = vjp->createBasicBlock();
// Clone.
SmallVector<SILValue, 4> entryArgs(entry->getArguments().begin(),
cloneFunctionBody(original, entry, entryArgs);
// If errors occurred, back out.
if (errorOccurred)
return true;
// Each `@guaranteed` trampoline argument needs to have a lifetime-ending use
// past its destination argument's lifetime-ending uses (aka. `end_borrow`).
// `trampolinedGuaranteedPhiArguments` tracks all `@guaranteed` trampoline
// arguments. We emit an `end_borrow` immediately past each destination
// argument's lifetime-ending uses.
for (auto &trampolinedArgPair : trampolinedGuaranteedPhiArguments) {
for (auto *destArgUse : trampolinedArgPair.destinationArgument->getUses()) {
if (auto *lifetimeEnd = dyn_cast<EndBorrowInst>(destArgUse->getUser())) {
lifetimeEnd->getLoc(), trampolinedArgPair.trampolineArgument);
// Generate pullback code.
PullbackEmitter PullbackEmitter(*this);
if ( {
errorOccurred = true;
return true;
LLVM_DEBUG(getADDebugStream() << "Generated VJP for "
<< original->getName() << ":\n" << *vjp);
return errorOccurred;
// `[differentiable]` attribute processing
SILFunction *
SILFunction *original, SILDifferentiableAttr *attr, StringRef name,
AutoDiffDerivativeFunctionKind kind) {
auto &module = getModule();
auto &indices = attr->getIndices();
auto originalTy = original->getLoweredFunctionType();
auto originalLoc = original->getLocation();
auto assocGenSig = getDerivativeGenericSignature(attr, original);
auto derivativeFnTy = originalTy->getAutoDiffDerivativeFunctionType(
indices.parameters, indices.source, kind, module.Types,
LookUpConformanceInModule(module.getSwiftModule()), assocGenSig);
SILOptFunctionBuilder fb(getTransform());
// Create external function declaration.
auto *derivativeFn = fb.createFunction(
SILLinkage::PublicExternal, name, derivativeFnTy,
/*genericEnv*/ nullptr, originalLoc, original->isBare(), IsNotTransparent,
original->isSerialized(), original->isDynamicallyReplaceable());
// Note: Setting debug scope prevents crashes during later transforms.
derivativeFn->setDebugScope(new (module) SILDebugScope(originalLoc, derivativeFn));
return derivativeFn;
static SILFunction *createEmptyVJP(
ADContext &context, SILFunction *original, SILDifferentiableAttr *attr,
bool isExported) {
auto &s = getADDebugStream();
s << "Creating VJP:\n\t";
s << "Original type: " << original->getLoweredFunctionType() << "\n\t";
auto &module = context.getModule();
auto originalTy = original->getLoweredFunctionType();
auto indices = attr->getIndices();
// === Create an empty VJP. ===
Mangle::ASTMangler mangler;
auto vjpName = original->getASTContext().getIdentifier(
original->getName(), AutoDiffDerivativeFunctionKind::VJP, indices))
auto vjpGenericSig = getDerivativeGenericSignature(attr, original);
// RAII that pushes the original function's generic signature to
// `module.Types` so that calls to `module.Types.getTypeLowering()` below
// will know the VJP's generic parameter types.
Lowering::GenericContextScope genericContextScope(
module.Types, vjpGenericSig);
auto *vjpGenericEnv = vjpGenericSig
? vjpGenericSig->getGenericEnvironment()
: nullptr;
auto vjpType = originalTy->getAutoDiffDerivativeFunctionType(
indices.parameters, indices.source, AutoDiffDerivativeFunctionKind::VJP,
module.Types, LookUpConformanceInModule(module.getSwiftModule()),
SILOptFunctionBuilder fb(context.getTransform());
auto linkage = autodiff::getAutoDiffDerivativeFunctionLinkage(
original->getLinkage(), isExported);
auto *vjp = fb.createFunction(linkage, vjpName, vjpType, vjpGenericEnv,
original->getLocation(), original->isBare(),
IsNotTransparent, original->isSerialized(),
vjp->setDebugScope(new (module) SILDebugScope(original->getLocation(), vjp));
LLVM_DEBUG(llvm::dbgs() << "VJP type: " << vjp->getLoweredFunctionType()
<< "\n");
return vjp;
static SILFunction *createEmptyJVP(
ADContext &context, SILFunction *original, SILDifferentiableAttr *attr,
bool isExported) {
auto &s = getADDebugStream();
s << "Creating JVP:\n\t";
s << "Original type: " << original->getLoweredFunctionType() << "\n\t";
auto &module = context.getModule();
auto originalTy = original->getLoweredFunctionType();
auto indices = attr->getIndices();
// === Create an empty JVP. ===
Mangle::ASTMangler mangler;
auto jvpName = original->getASTContext().getIdentifier(
original->getName(), AutoDiffDerivativeFunctionKind::JVP, indices))
auto jvpGenericSig = getDerivativeGenericSignature(attr, original);
// RAII that pushes the original function's generic signature to
// `module.Types` so that calls to `module.Types.getTypeLowering()` below
// will know the VJP's generic parameter types.
Lowering::GenericContextScope genericContextScope(
module.Types, jvpGenericSig);
auto *jvpGenericEnv = jvpGenericSig
? jvpGenericSig->getGenericEnvironment()
: nullptr;
auto jvpType = originalTy->getAutoDiffDerivativeFunctionType(
indices.parameters, indices.source,
AutoDiffDerivativeFunctionKind::JVP, module.Types,
LookUpConformanceInModule(module.getSwiftModule()), jvpGenericSig);
SILOptFunctionBuilder fb(context.getTransform());
auto linkage = autodiff::getAutoDiffDerivativeFunctionLinkage(
original->getLinkage(), isExported);
auto *jvp = fb.createFunction(linkage, jvpName, jvpType, jvpGenericEnv,
original->getLocation(), original->isBare(),
IsNotTransparent, original->isSerialized(),
jvp->setDebugScope(new (module) SILDebugScope(original->getLocation(), jvp));
LLVM_DEBUG(llvm::dbgs() << "JVP type: " << jvp->getLoweredFunctionType()
<< "\n");
return jvp;
/// Returns true on error.
bool ADContext::processDifferentiableAttribute(
SILFunction *original, SILDifferentiableAttr *attr,
DifferentiationInvoker invoker) {
auto &module = getModule();
// Try to look up JVP only if attribute specifies JVP name or if original
// function is an external declaration. If JVP function cannot be found,
// create an external JVP reference.
StringRef jvpName;
SILFunction *jvp = nullptr;
if (attr->hasJVP()) {
jvpName = attr->getJVPName();
} else if (original->isExternalDeclaration()) {
Mangle::ASTMangler mangler;
jvpName = original->getASTContext().getIdentifier(
original->getName(), AutoDiffDerivativeFunctionKind::JVP,
if (!jvpName.empty()) {
jvp = module.lookUpFunction(jvpName);
if (!jvp)
jvp = declareExternalDerivativeFunction(
original, attr, jvpName, AutoDiffDerivativeFunctionKind::JVP);
// If differentiation is triggered by `[differentiable]`, derivative function
// should share linkage of original function.
auto isDerivativeFnExported =
invoker.getKind() ==
// Try to look up VJP only if attribute specifies VJP name or if original
// function is an external declaration. If VJP function cannot be found,
// create an external VJP reference.
StringRef vjpName;
SILFunction *vjp = nullptr;
if (attr->hasVJP()) {
vjpName = attr->getVJPName();
} else if (original->isExternalDeclaration()) {
Mangle::ASTMangler mangler;
vjpName = original->getASTContext().getIdentifier(
original->getName(), AutoDiffDerivativeFunctionKind::VJP,
if (!vjpName.empty()) {
vjp = module.lookUpFunction(vjpName);
if (!vjp)
vjp = declareExternalDerivativeFunction(
original, attr, vjpName, AutoDiffDerivativeFunctionKind::VJP);
// If the JVP doesn't exist, need to synthesize it.
if (!jvp) {
// Diagnose:
// - Functions with no return.
// - Functions with unsupported control flow.
if (RunJVPGeneration && (diagnoseNoReturn(*this, original, invoker) ||
diagnoseUnsupportedControlFlow(*this, original, invoker)))
return true;
jvp = createEmptyJVP(*this, original, attr, isDerivativeFnExported);
// For now, only do JVP generation if the flag is enabled and if custom VJP
// does not exist. If custom VJP exists but custom JVP does not, skip JVP
// generation because generated JVP may not match semantics of custom VJP.
// Instead, create an empty JVP.
if (RunJVPGeneration && !vjp) {
// JVP and differential generation do not currently support functions with
// multiple basic blocks.
if (original->getBlocks().size() > 1) {
original->getLocation().getSourceLoc(), invoker,
return true;
JVPEmitter emitter(*this, original, attr, jvp, invoker);
if (
return true;
} else {
<< "Generating empty JVP for original @"
<< original->getName() << '\n');
// Create empty JVP body since custom VJP exists.
auto *entry = jvp->createBasicBlock();
SILBuilder builder(entry);
auto loc = jvp->getLocation();
// Destroy all owned arguments.
for (auto *arg : entry->getArguments())
if (arg->getOwnershipKind() == ValueOwnershipKind::Owned)
builder.emitDestroyOperation(loc, arg);
// Fatal error in case this JVP is called by the user.
auto neverResultInfo = SILResultInfo(
module.getASTContext().getNeverType(), ResultConvention::Unowned);
auto fatalErrorJVPType = SILFunctionType::get(
/*genericSig*/ nullptr,
SILCoroutineKind::None, ParameterConvention::Direct_Unowned, {},
/*interfaceYields*/ {}, neverResultInfo,
/*interfaceErrorResults*/ None, getASTContext());
auto fnBuilder = SILOptFunctionBuilder(getTransform());
auto *fatalErrrorJvpFunc = fnBuilder.getOrCreateFunction(
loc, "_printJVPErrorAndExit", SILLinkage::PublicExternal,
fatalErrorJVPType, IsNotBare, IsNotTransparent, IsNotSerialized,
IsNotDynamic, ProfileCounter(), IsNotThunk);
auto *jvpErrorFuncRef =
builder.createFunctionRef(loc, fatalErrrorJvpFunc);
builder.createApply(loc, jvpErrorFuncRef, SubstitutionMap(), {});
LLVM_DEBUG(getADDebugStream() << "Generated empty JVP for "
<< original->getName() << ":\n" << *jvp);
// If the VJP doesn't exist, need to synthesize it.
if (!vjp) {
// Diagnose:
// - Functions with no return.
// - Functions with unsupported control flow.
if (diagnoseNoReturn(*this, original, invoker) ||
diagnoseUnsupportedControlFlow(*this, original, invoker))
return true;
vjp = createEmptyVJP(*this, original, attr, isDerivativeFnExported);
VJPEmitter emitter(*this, original, attr, vjp, invoker);
return false;
// Differentiation pass implementation
/// The automatic differentiation pass.
namespace {
class Differentiation : public SILModuleTransform {
Differentiation() : SILModuleTransform() {}
void run() override;
} // end anonymous namespace
std::pair<SILFunction *, SubstitutionMap>
SILFunction *parentThunk, CanSILFunctionType linearMapType,
CanSILFunctionType targetType, AutoDiffDerivativeFunctionKind kind,
SILAutoDiffIndices desiredIndices, SILAutoDiffIndices actualIndices) {
<< "Getting a subset parameters thunk for " << linearMapType
<< " from " << actualIndices << " to " << desiredIndices << '\n');
SubstitutionMap interfaceSubs;
GenericEnvironment *genericEnv = nullptr;
auto thunkType = buildThunkType(
parentThunk, linearMapType, targetType, genericEnv, interfaceSubs,
/*withoutActuallyEscaping*/ true,
// TODO(TF-685): Use more principled mangling for thunks.
std::string thunkName;
switch (kind) {
case AutoDiffDerivativeFunctionKind::JVP:
thunkName = "differential";
case AutoDiffDerivativeFunctionKind::VJP:
thunkName = "pullback";
Mangle::ASTMangler mangler;
auto fromInterfaceType =
auto toInterfaceType = targetType->mapTypeOutOfContext()->getCanonicalType();
CanType dynamicSelfType;
thunkName = "AD__" + mangler.mangleReabstractionThunkHelper(
thunkType, fromInterfaceType, toInterfaceType, dynamicSelfType,
module.getSwiftModule()) + "_" + desiredIndices.mangle() + "_" +
thunkName += "_index_subset_thunk";
auto loc = parentThunk->getLocation();
SILOptFunctionBuilder fb(getTransform());
auto *thunk = fb.getOrCreateSharedFunction(
loc, thunkName, thunkType, IsBare, IsTransparent, IsSerialized,
ProfileCounter(), IsThunk, IsNotDynamic);
if (!thunk->empty())
return {thunk, interfaceSubs};
auto *entry = thunk->createBasicBlock();
SILBuilder builder(entry);
// Get arguments.
SmallVector<SILValue, 4> arguments;
SmallVector<AllocStackInst *, 4> localAllocations;
// Build a `.zero` argument for the given `Differentiable`-conforming type.
auto buildZeroArgument = [&](SILType zeroSILType) {
auto zeroSILObjType = zeroSILType.getObjectType();
auto zeroType = zeroSILType.getASTType();
auto *swiftMod = getModule().getSwiftModule();
auto tangentSpace = zeroType->getAutoDiffAssociatedTangentSpace(
assert(tangentSpace && "No tangent space for this type");
switch (tangentSpace->getKind()) {
case VectorSpace::Kind::Vector: {
auto *buf = builder.createAllocStack(loc, zeroSILObjType);
emitZeroIntoBuffer(builder, zeroType, buf, loc);
if (zeroSILType.isAddress())
else {
auto *arg = builder.createLoad(loc, buf,
case VectorSpace::Kind::Tuple: {
"Unimplemented: Handle zero initialization for tuples");
case VectorSpace::Kind::Function:
"Unimplemented: Emit thunks for abstracting zero initialization");
// `actualIndices` and `desiredIndices` are with respect to the original
// function. However, the differential parameters and pullback results may
// already be w.r.t. a subset. We create a map between the original function's
// actual parameter indices and the linear map's actual indices.
// Example:
// Original: (T0, T1, T2) -> R
// Actual indices: 0, 2
// Original differential: (T0, T2) -> R
// Original pullback: R -> (T0, T2)
// Desired indices w.r.t. original: 2
// Desired indices w.r.t. linear map: 1
SmallVector<unsigned, 4> actualParamIndicesMap(
actualIndices.parameters->getCapacity(), UINT_MAX);
unsigned indexInBitVec = 0;
for (auto index : actualIndices.parameters->getIndices()) {
actualParamIndicesMap[index] = indexInBitVec;
auto mapOriginalParameterIndex = [&](unsigned index) -> unsigned {
auto mappedIndex = actualParamIndicesMap[index];
assert(mappedIndex < actualIndices.parameters->getCapacity());
return mappedIndex;
switch (kind) {
// Differential arguments are:
// - All indirect results, followed by:
// - An interleaving of:
// - Thunk arguments (when parameter index is in both desired and actual
// indices).
// - Zeros (when parameter is not in desired indices).
case AutoDiffDerivativeFunctionKind::JVP: {
// Forward all indirect results.
auto toArgIter = thunk->getArgumentsWithoutIndirectResults().begin();
auto useNextArgument = [&]() {
// Iterate over actual indices.
for (unsigned i : actualIndices.parameters->getIndices()) {
// If index is desired, use next argument.
if (desiredIndices.isWrtParameter(i)) {
// Otherwise, construct and use a zero argument.
else {
auto zeroSILType =
// Pullback arguments are:
// - An interleaving of:
// - Thunk indirect results (when parameter index is in both desired and
// actual indices).
// - Zeros (when parameter is not in desired indices).
// - All actual arguments.
case AutoDiffDerivativeFunctionKind::VJP: {
auto toIndirectResultsIter = thunk->getIndirectResults().begin();
auto useNextResult = [&]() {
// Iterate over actual indices.
for (unsigned i : actualIndices.parameters->getIndices()) {
auto resultInfo =
// Skip direct results. Only indirect results are relevant as arguments.
if (resultInfo.isFormalDirect())
// If index is desired, use next indirect result.
if (desiredIndices.isWrtParameter(i)) {
// Otherwise, construct and use an uninitialized indirect result.
auto *indirectResult =
builder.createAllocStack(loc, resultInfo.getSILStorageType());
// Foward all actual non-indirect-result arguments.
thunk->getArgumentsWithoutIndirectResults().end() - 1);
// Get the linear map thunk argument and apply it.
auto *linearMap = thunk->getArguments().back();
auto *ai = builder.createApply(
loc, linearMap, SubstitutionMap(), arguments, /*isNonThrowing*/ false);
// If differential thunk, deallocate local allocations and directly return
// `apply` result.
if (kind == AutoDiffDerivativeFunctionKind::JVP) {
for (auto *alloc : llvm::reverse(localAllocations))
builder.createDeallocStack(loc, alloc);
builder.createReturn(loc, ai);
return {thunk, interfaceSubs};
// If pullback thunk, return only the desired results and clean up the
// undesired results.
SmallVector<SILValue, 8> pullbackDirectResults;
extractAllElements(ai, builder, pullbackDirectResults);
SmallVector<SILValue, 8> allResults;
collectAllActualResultsInTypeOrder(ai, pullbackDirectResults, allResults);
SmallVector<SILValue, 8> results;
for (unsigned i : actualIndices.parameters->getIndices()) {
// If result is desired:
// - Do nothing if result is indirect.
// (It was already forwarded to the `apply` instruction).
// - Push it to `results` if result is direct.
auto result = allResults[mapOriginalParameterIndex(i)];
if (desiredIndices.isWrtParameter(i)) {
if (result->getType().isObject())
// Otherwise, cleanup the unused results.
else {
if (result->getType().isAddress())
builder.emitDestroyAddrAndFold(loc, result);
builder.emitDestroyValueOperation(loc, result);
// Deallocate local allocations and return final direct result.
for (auto *alloc : llvm::reverse(localAllocations))
builder.createDeallocStack(loc, alloc);
auto result = joinElements(results, builder, loc);
builder.createReturn(loc, result);
return {thunk, interfaceSubs};
std::pair<SILFunction *, SubstitutionMap>
SILValue origFnOperand, SILValue derivativeFn,
AutoDiffDerivativeFunctionKind kind, SILAutoDiffIndices desiredIndices,
SILAutoDiffIndices actualIndices) {
<< "Getting a subset parameters thunk for derivative function "
<< derivativeFn << " of the original function " << origFnOperand
<< " from " << actualIndices << " to " << desiredIndices << '\n');
auto origFnType = origFnOperand->getType().castTo<SILFunctionType>();
auto &module = getModule();
auto lookupConformance = LookUpConformanceInModule(module.getSwiftModule());
// Compute target type for thunking.
auto derivativeFnType = derivativeFn->getType().castTo<SILFunctionType>();
auto targetType = origFnType->getAutoDiffDerivativeFunctionType(
desiredIndices.parameters, desiredIndices.source, kind, module.Types,
auto *caller = derivativeFn->getFunction();
if (targetType->hasArchetype()) {
auto substTargetType = caller->mapTypeIntoContext(
targetType = SILType::getPrimitiveObjectType(substTargetType)
assert(derivativeFnType->getNumParameters() == targetType->getNumParameters());
assert(derivativeFnType->getNumResults() == targetType->getNumResults());
// Build thunk type.
SubstitutionMap interfaceSubs;
GenericEnvironment *genericEnv = nullptr;
auto thunkType = buildThunkType(
derivativeFn->getFunction(), derivativeFnType, targetType, genericEnv,
interfaceSubs, /*withoutActuallyEscaping*/ false,
// FIXME: The logic for resolving `assocRef` does not reapply function
// conversions, which is problematic if `derivativeFn` is a `partial_apply`
// instruction.
StringRef origName;
if (auto *origFnRef =
peerThroughFunctionConversions<FunctionRefInst>(origFnOperand)) {
origName = origFnRef->getInitiallyReferencedFunction()->getName();
} else if (auto *origMethodInst =
peerThroughFunctionConversions<MethodInst>(origFnOperand)) {
origName = origMethodInst->getMember().getAnyFunctionRef()
assert(!origName.empty() && "Original function name could not be resolved");
// TODO(TF-685): Use more principled mangling for thunks.
std::string thunkName;
switch (kind) {
case AutoDiffDerivativeFunctionKind::JVP:
thunkName = "jvp";
case AutoDiffDerivativeFunctionKind::VJP:
thunkName = "vjp";
Mangle::ASTMangler mangler;
auto fromInterfaceType =
auto toInterfaceType = targetType->mapTypeOutOfContext()->getCanonicalType();
CanType dynamicSelfType;
thunkName = "AD__orig_" + origName.str() + "_" +
thunkType, fromInterfaceType, toInterfaceType, dynamicSelfType,
module.getSwiftModule()) + "_" + desiredIndices.mangle() + "_" +
thunkName += "_subset_parameters_thunk";
auto loc = origFnOperand.getLoc();
SILOptFunctionBuilder fb(getTransform());
auto *thunk = fb.getOrCreateSharedFunction(
loc, thunkName, thunkType, IsBare, IsTransparent, caller->isSerialized(),
ProfileCounter(), IsThunk, IsNotDynamic);
if (!thunk->empty())
return {thunk, interfaceSubs};
auto *entry = thunk->createBasicBlock();
SILBuilder builder(entry);
SubstitutionMap assocSubstMap;
if (auto *partialApply = dyn_cast<PartialApplyInst>(derivativeFn))
assocSubstMap = partialApply->getSubstitutionMap();
// FIXME: The logic for resolving `assocRef` does not reapply function
// conversions, which is problematic if `derivativeFn` is a `partial_apply`
// instruction.
SILValue assocRef;
if (auto *derivativeFnRef =
peerThroughFunctionConversions<FunctionRefInst>(derivativeFn)) {
auto *assoc = derivativeFnRef->getReferencedFunctionOrNull();
assocRef = builder.createFunctionRef(loc, assoc);
} else if (auto *assocMethodInst =
peerThroughFunctionConversions<WitnessMethodInst>(derivativeFn)) {
assocRef = builder.createWitnessMethod(
loc, assocMethodInst->getLookupType(),
assocMethodInst->getConformance(), assocMethodInst->getMember(),
} else if (auto *assocMethodInst =
peerThroughFunctionConversions<ClassMethodInst>(derivativeFn)) {
auto classOperand = thunk->getArgumentsWithoutIndirectResults().back();
auto classOperandType = assocMethodInst->getOperand()->getType();
assert(classOperand->getType() == classOperandType);
assocRef = builder.createClassMethod(
loc, classOperand, assocMethodInst->getMember(),
assert(assocRef && "Expected derivative function to be resolved");
assocSubstMap = assocSubstMap.subst(thunk->getForwardingSubstitutionMap());
derivativeFnType = assocRef->getType().castTo<SILFunctionType>();
SmallVector<SILValue, 4> arguments;
arguments.append(thunk->getArguments().begin(), thunk->getArguments().end());
assert(arguments.size() == derivativeFnType->getNumParameters() +
auto *apply = builder.createApply(
loc, assocRef, assocSubstMap, arguments, /*isNonThrowing*/ false);
// Extract all direct results.
SmallVector<SILValue, 8> directResults;
extractAllElements(apply, builder, directResults);
auto originalDirectResults = ArrayRef<SILValue>(directResults).drop_back(1);
auto originalDirectResult =
joinElements(originalDirectResults, builder, apply->getLoc());
auto linearMap = directResults.back();
auto linearMapType = linearMap->getType().castTo<SILFunctionType>();
auto linearMapTargetType = targetType->getResults().back().getSILStorageType()
SILFunction *linearMapThunk;
SubstitutionMap linearMapSubs;
std::tie(linearMapThunk, linearMapSubs) =
thunk, linearMapType, linearMapTargetType, kind,
desiredIndices, actualIndices);
auto *linearMapThunkFRI = builder.createFunctionRef(loc, linearMapThunk);
auto *thunkedLinearMap = builder.createPartialApply(
loc, linearMapThunkFRI, linearMapSubs, {linearMap},
assert(origFnType->getResults().size() == 1);
if (origFnType->getResults().front().isFormalDirect()) {
auto result = joinElements(
{originalDirectResult, thunkedLinearMap}, builder, loc);
builder.createReturn(loc, result);
} else {
builder.createReturn(loc, thunkedLinearMap);
return {thunk, interfaceSubs};
SILValue ADContext::promoteToDifferentiableFunction(
DifferentiableFunctionInst *dfi, SILBuilder &builder, SILLocation loc,
DifferentiationInvoker invoker) {
auto origFnOperand = dfi->getOriginalFunction();
auto origFnTy = origFnOperand->getType().castTo<SILFunctionType>();
auto parameterIndices = dfi->getParameterIndices();
unsigned resultIndex = resultIndices[dfi];
// Handle curry thunk applications specially.
if (auto *ai = dyn_cast<ApplyInst>(origFnOperand)) {
if (auto *thunkRef = dyn_cast<FunctionRefInst>(ai->getCallee())) {
// Create a new curry thunk.
SILAutoDiffIndices desiredIndices(resultIndex, parameterIndices);
auto *thunk = thunkRef->getReferencedFunctionOrNull();
// TODO(TF-685): Use more principled mangling for thunks.
auto newThunkName = "AD__" + thunk->getName().str() +
"__differentiable_curry_thunk_" + desiredIndices.mangle();
auto thunkTy = thunk->getLoweredFunctionType();
auto thunkResult = thunkTy->getSingleResult();
if (auto resultFnTy = thunkResult.getType()->getAs<SILFunctionType>()) {
// Construct new curry thunk type with `@differentiable` result.
auto diffableResultFnTy = resultFnTy->getWithExtInfo(
auto newThunkResult = thunkResult.getWithType(diffableResultFnTy);
auto thunkType = SILFunctionType::get(
thunkTy->getGenericSignature(), thunkTy->getExtInfo(),
thunkTy->getCoroutineKind(), thunkTy->getCalleeConvention(),
thunkTy->getParameters(), {}, {newThunkResult}, {},
// Construct new curry thunk, returning a `@differentiable` function.
SILOptFunctionBuilder fb(transform);
auto *newThunk = fb.getOrCreateFunction(
loc, newThunkName,
getSpecializedLinkage(thunk, thunk->getLinkage()), thunkType,
thunk->isBare(), thunk->isTransparent(), thunk->isSerialized(),
thunk->isDynamicallyReplaceable(), ProfileCounter(),
// If new thunk is newly created: clone the old thunk body, wrap the
// returned function value with an `differentiable_function`
// instruction, and process the `differentiable_function` instruction.
if (newThunk->empty()) {
if (auto newThunkGenSig = thunkType->getGenericSignature())
BasicTypeSubstCloner cloner(thunk, newThunk);;
auto *retInst =
SILBuilder thunkBuilder(retInst);
auto *dfi = createDifferentiableFunction(thunkBuilder, loc,
resultIndices[dfi] = resultIndex;
thunkBuilder.createReturn(loc, dfi);
if (processDifferentiableFunctionInst(dfi))
return nullptr;
// Apply the new curry thunk.
auto *newThunkRef = builder.createFunctionRef(loc, newThunk);
SmallVector<SILValue, 8> newArgs;
SmallVector<SILValue, 8> newArgsToDestroy;
SmallVector<AllocStackInst *, 1> newBuffersToDealloc;
copyParameterArgumentsForApply(ai, newArgs, newArgsToDestroy,
auto *newApply = builder.createApply(
ai->getLoc(), newThunkRef, ai->getSubstitutionMap(), newArgs,
for (auto arg : newArgsToDestroy) {
if (arg->getType().isObject())
builder.emitDestroyValueOperation(loc, arg);
builder.emitDestroyAddr(loc, arg);
for (auto *alloc : newBuffersToDealloc)
builder.createDeallocStack(loc, alloc);
return newApply;
SILAutoDiffIndices desiredIndices(resultIndex, parameterIndices);
SmallVector<SILValue, 2> derivativeFns;
SmallVector<AllocStackInst *, 2> newBuffersToDealloc;
for (auto derivativeFnKind : {AutoDiffDerivativeFunctionKind::JVP,
AutoDiffDerivativeFunctionKind::VJP}) {
auto derivativeFnAndIndices = emitDerivativeFunctionReference(
*this, builder, desiredIndices, derivativeFnKind, origFnOperand, invoker,
// Show an error at the operator, highlight the argument, and show a note
// at the definition site of the argument.
if (!derivativeFnAndIndices)
return nullptr;
auto derivativeFn = derivativeFnAndIndices->first;
// If desired indices are a subset of actual indices, create a "subset
// indices thunk" and destroy the emitted derivative function reference.
// - For JVPs: the thunked JVP returns a differential taking fewer
// parameters (using `.zero` for the dropped parameters).
// - For VJPs: the thunked VJP returns a pullback that drops the unused
// tangent values.
auto actualIndices = derivativeFnAndIndices->second;
// NOTE: `desiredIndices` may come from a partially-applied function and
// have smaller capacity than `actualIndices`. We expect this logic to go
// away when we support `@differentiable` partial apply.
// if (actualIndices != desiredIndices) { // TODO: Re-enable.
auto extendedDesiredIndices = desiredIndices.parameters->extendingCapacity(
getASTContext(), actualIndices.parameters->getCapacity());
if (actualIndices.source != desiredIndices.source ||
!actualIndices.parameters->equals(extendedDesiredIndices)) {
// Destroy the already emitted derivative function reference because it
// is no longer used.
builder.emitDestroyValueOperation(loc, derivativeFn);
// Check if underlying original function reference has been partially
// applied with arguments. If so, produce an error: parameter subset
// thunks do not yet support this case because partially applied arguments
// cannot be propagated to parameter subset thunks.
auto didPartiallyApplyArguments = [](SILValue original) {
while (auto *pai =
peerThroughFunctionConversions<PartialApplyInst>(original)) {
if (pai->getNumArguments() > 0)
return true;
original = pai->getCallee();
return false;
if (didPartiallyApplyArguments(origFnOperand)) {
origFnOperand, invoker,
return nullptr;
// Create the parameter subset thunk.
SILFunction *thunk;
SubstitutionMap interfaceSubs;
std::tie(thunk, interfaceSubs) =
origFnOperand, derivativeFn, derivativeFnKind, desiredIndices,
auto *thunkFRI = builder.createFunctionRef(loc, thunk);
if (auto genSig =
thunk->getLoweredFunctionType()->getGenericSignature()) {
derivativeFn = builder.createPartialApply(
loc, thunkFRI, interfaceSubs, {},
} else {
derivativeFn = thunkFRI;
auto expectedDerivativeFnTy = origFnTy->getAutoDiffDerivativeFunctionType(
parameterIndices, resultIndex, derivativeFnKind, getTypeConverter(),
// If `derivativeFn` is `@convention(thin)` but is expected to be
// `@convention(thick)`, emit a `thin_to_thick` instruction.
if (expectedDerivativeFnTy->getRepresentation()
== SILFunctionTypeRepresentation::Thick &&
== SILFunctionTypeRepresentation::Thin) {
derivativeFn = builder.createThinToThickFunction(
loc, derivativeFn, SILType::getPrimitiveObjectType(expectedDerivativeFnTy));
// Deallocate temporary buffers used for creating derivative functions.
for (auto *buf : llvm::reverse(newBuffersToDealloc))
builder.createDeallocStack(loc, buf);
auto origFnCopy = builder.emitCopyValueOperation(loc, origFnOperand);
auto *newDFI = createDifferentiableFunction(
builder, loc, parameterIndices, origFnCopy,
std::make_pair(derivativeFns[0], derivativeFns[1]));
resultIndices[dfi] = resultIndex;
return newDFI;
/// Fold `differentiable_function_extract` users of the given
/// `differentiable_function` instruction, directly replacing them with
/// `differentiable_function` instruction operands. If the
/// `differentiable_function` instruction has no remaining uses, delete the
/// instruction itself after folding.
/// Folding can be disabled by the `SkipFoldingDifferentiableFunctionExtraction`
/// flag for SIL testing purposes.
// FIXME: This function is not correctly detecting the foldable pattern and
// needs to be rewritten.
void ADContext::foldDifferentiableFunctionExtraction(
DifferentiableFunctionInst *source) {
// Iterate through all `differentiable_function` instruction uses.
for (auto use : source->getUses()) {
auto *dfei = dyn_cast<DifferentiableFunctionExtractInst>(use->getUser());
// If user is not an `differentiable_function_extract` instruction, set flag
// to false.
if (!dfei)
// Fold original function extractors.
if (dfei->getExtractee() ==
NormalDifferentiableFunctionTypeComponent::Original) {
auto originalFnValue = source->getOriginalFunction();
// Fold derivative function extractors.
auto derivativeFnValue =
// If the `differentiable_function` instruction has no remaining uses, erase
// it.
if (isInstructionTriviallyDead(source)) {
SILBuilder builder(source);
builder.emitDestroyAddrAndFold(source->getLoc(), source->getJVPFunction());
builder.emitDestroyAddrAndFold(source->getLoc(), source->getVJPFunction());
// Mark `source` as processed so that it won't be reprocessed after deletion.
bool ADContext::processDifferentiableFunctionInst(
DifferentiableFunctionInst *dfi) {
auto &s = getADDebugStream() << "Processing DifferentiableFunctionInst:\n";
if (dfi->hasDerivativeFunctions())
return false;
SILFunction *parent = dfi->getFunction();
auto loc = dfi->getLoc();
SILBuilder builder(dfi);
auto differentiableFnValue =
promoteToDifferentiableFunction(dfi, builder, loc, dfi);
// Mark `dfi` as processed so that it won't be reprocessed after deletion.
if (!differentiableFnValue)
return true;
// Replace all uses of `dfi`.
// Destroy the original operand.
builder.emitDestroyValueOperation(loc, dfi->getOriginalFunction());
// If the promoted `@differentiable` function-typed value is an
// `differentiable_function` instruction, fold
// `differentiable_function_extract` instructions. If
// `differentiable_function_extract` folding is disabled, return.
if (!SkipFoldingDifferentiableFunctionExtraction)
if (auto *newDFI =
parent, SILAnalysis::InvalidationKind::FunctionBody);
return false;
/// AD pass entry.
void Differentiation::run() {
auto &module = *getModule();
auto &astCtx = module.getASTContext();
// A global differentiation context.
ADContext context(*this);
bool errorOccurred = false;
// Register all `@differentiable` attributes and `differentiable_function`
// instructions in the module that trigger differentiation.
for (SILFunction &f : module) {
for (auto *diffAttr : f.getDifferentiableAttrs()) {
DifferentiationInvoker invoker(diffAttr);
assert(!context.getInvokers().count(diffAttr) &&
"[differentiable] attribute already has an invoker");
context.getInvokers().insert({diffAttr, invoker});
for (SILBasicBlock &bb : f) {
for (SILInstruction &i : bb) {
if (auto *dfi = dyn_cast<DifferentiableFunctionInst>(&i))
else if (auto *lfi = dyn_cast<LinearFunctionInst>(&i)) {
errorOccurred = true;
// If nothing has triggered differentiation, there's nothing to do.
if (context.getInvokers().empty() &&
// AD relies on stdlib (the Swift module). If it's not imported, it's an
// internal error.
if (!astCtx.getStdlibModule()) {
// Process all `[differentiable]` attributes.
for (auto invokerPair : context.getInvokers()) {
auto *attr = invokerPair.first;
auto *original = attr->getOriginal();
auto invoker = invokerPair.second;
errorOccurred |=
context.processDifferentiableAttribute(original, attr, invoker);
// Iteratively process `differentiable_function` instruction worklist.
while (!context.getDifferentiableFunctionInsts().empty()) {
auto *dfi = context.getDifferentiableFunctionInsts().back();
// Skip instructions that have been already been processed.
if (context.getProcessedDifferentiableFunctionInsts().count(dfi)) continue;
errorOccurred |= context.processDifferentiableFunctionInst(dfi);
// If any error occurred while processing `[differentiable]` attributes or
// `differentiable_function` instructions, clean up.
if (errorOccurred) {
LLVM_DEBUG(getADDebugStream() << "All differentiation finished\n");
// Pass creation
SILTransform *swift::createDifferentiation() {
return new Differentiation;