blob: 872ad885988960723d556b076605f218db2436db [file] [log] [blame]
/*
* Copyright (c) 2019, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <cassert>
#include <cstddef>
#include <cstdio>
#include <map>
#include <tuple>
#include "cm_fc_ld.h"
#include "DepGraph.h"
using namespace cm::patch;
DepNode *DepGraph::getDepNode(Binary *B, unsigned Off, bool Barrier = false) {
auto P = std::make_tuple(B, Off, Barrier);
auto I = NodeMap.find(P);
if (I != NodeMap.end())
return I->second;
Nodes.push_back(DepNode{B, Off, Barrier});
DepNode *N = &Nodes.back();
NodeMap[P] = N;
return N;
}
DepEdge *DepGraph::getDepEdge(DepNode *From, DepNode *To, bool FromDef) {
if (From == To) // No dependency on itself.
return nullptr;
auto P = std::make_pair(From, To);
auto I = EdgeMap.find(P);
if (I != EdgeMap.end())
return I->second;
// Add new edge.
Edges.push_back(DepEdge{From, To, FromDef});
DepEdge *E = &Edges.back();
EdgeMap[P] = E;
From->addToNode(To, FromDef);
To->addFromNode(From);
return E;
}
void DepGraph::build() {
// There's no need to build dependency graph for policy 0 & 2.
if (Policy == SWSB_POLICY_0 || Policy == SWSB_POLICY_2)
return;
std::map<unsigned, DepNode *> State;
auto requireDefSync = [](std::map<unsigned, DepNode *> &State) {
for (auto &KV : State)
if (KV.second->isDefByToken(KV.first))
return true;
return false;
};
auto requireUseSync = [](std::map<unsigned, DepNode *> &State) {
for (auto &KV : State)
if (KV.second->isUseByToken(KV.first))
return true;
return false;
};
unsigned Order = 0;
for (auto I = C.bin_begin(), E = C.bin_end(); I != E; ++I) {
Binary &B = *I;
if (B.getLinkType() == CM_FC_LINK_TYPE_CALLEE)
break;
B.setOrder(++Order);
if (B.getLinkType() == CM_FC_LINK_TYPE_CALLER) {
// For caller, add dependency edges to the sync point just before 'call'.
Relocation &Rel = *B.rel_begin(); // Assume there's just one 'call'.
Symbol *Sym = Rel.getSymbol();
Binary &Callee = *Sym->getBinary();
// Add a barrier node just before 'call' to resolve dependency.
DepNode *Node = getDepNode(&B, Rel.getOffset(), true);
for (auto RI = Callee.initreg_begin(),
RE = Callee.initreg_end(); RI != RE; ++RI) {
unsigned Reg = RI->getRegNo();
if (Reg == cm::patch::REG_NONE) {
/// Check if it's necessary to insert sync points.
bool reqDefSync = requireDefSync(State);
bool reqUseSync = requireUseSync(State);
if (reqDefSync || reqUseSync) {
Node->clearAccList();
if (reqDefSync) Node->setRdTokenMask(unsigned(-1));
if (reqUseSync) Node->setWrTokenMask(unsigned(-1));
B.insertSyncPoint(Node);
// Clear state after barrier.
State.clear();
break;
}
}
// Only build token-based dependency.
auto SI = State.find(Reg);
if (SI == State.end())
continue;
auto From = SI->second;
// Skip access not by token.
if (!From->isDefByToken(Reg) && !From->isUseByToken(Reg))
continue;
// Skip RAR.
if (From->isUseOnly(Reg) && RI->isUse())
continue;
Node->appendRegAcc(&*RI);
auto E = getDepEdge(From, Node, From->isDef(Reg));
}
// Only update token-based dependency.
Node = getDepNode(&B, Rel.getOffset());
for (auto RI = Callee.finireg_begin(),
RE = Callee.finireg_end(); RI != RE; ++RI) {
// Skip use-only access without token associated.
if (RI->isDefNotByToken())
continue;
unsigned Reg = RI->getRegNo();
Node->appendRegAcc(&*RI);
State[Reg] = Node;
}
continue;
}
// Build RAW, WAW, and WAR from the current state to the first access list.
for (auto RI = B.initreg_begin(), RE = B.initreg_end(); RI != RE; ++RI) {
unsigned Reg = RI->getRegNo();
// Check for sync barrier.
if (Reg == cm::patch::REG_NONE) {
/// Check if it's necessary to insert sync points.
bool reqDefSync = requireDefSync(State);
bool reqUseSync = requireUseSync(State);
if (reqDefSync || reqUseSync) {
DepNode *Node = getDepNode(&B, RI->getOffset(), true);
if (reqDefSync) Node->setRdTokenMask(unsigned(-1));
if (reqUseSync) Node->setWrTokenMask(unsigned(-1));
B.insertSyncPoint(Node);
// Clean state after barrier.
State.clear();
break;
}
}
auto SI = State.find(Reg);
// Skip if that register has no dependency.
if (SI == State.end())
continue;
auto From = SI->second;
// Skip if the previous node is a use without token.
if (From->isUseNotByToken(Reg))
continue;
// Skip RAR.
if (From->isUseOnly(Reg) && RI->isUse())
continue;
auto To = getDepNode(&B, RI->getOffset());
To->appendRegAcc(&*RI);
auto E = getDepEdge(From, To, From->isDef(Reg));
}
// Update the current from the last access list.
for (auto RI = B.finireg_begin(), RE = B.finireg_end(); RI != RE; ++RI) {
// Skip use-only access without token associated.
if (RI->isUseNotByToken())
continue;
unsigned Reg = RI->getRegNo();
auto Node = getDepNode(&B, RI->getOffset());
Node->appendRegAcc(&*RI);
State[Reg] = Node;
}
}
}
void DepGraph::resolve() {
// In policy 2, do nothing.
if (Policy == SWSB_POLICY_2)
return;
if (Policy == SWSB_POLICY_0) {
// In policy 0, insert sync barrier before each non-callee kernels.
for (auto I = C.bin_begin(), E = C.bin_end(); I != E; ++I) {
Binary &B = *I;
if (B.getLinkType() == CM_FC_LINK_TYPE_CALLEE)
continue;
DepNode *Node = getDepNode(&B, 0, true);
Node->setRdTokenMask(unsigned(-1));
Node->setWrTokenMask(unsigned(-1));
B.insertSyncPoint(Node);
}
return;
}
auto getEncodedDistance = [](Binary *Bin, unsigned Off) {
uint64_t swsb_mask = 0x000000000000FF00;
uint64_t qw = *((uint64_t *)(Bin->getData() + Off));
unsigned swsb = unsigned((qw & swsb_mask) >> 8);
if ((swsb & 0xf0) == 0x00)
return swsb & 0x7;
if ((swsb & 0x80) == 0x80)
return (swsb & 0x70) >> 4;
return 0U;
};
auto calcDistance = [](Collection &C,
Binary *From, unsigned FOff,
Binary *To, unsigned TOff) {
// Always assuem there's no compact instruction.
if (From == To)
return (TOff - FOff) / 16;
unsigned D = 0;
bool FoundFrom = false;
for (auto BI = C.bin_begin(), BE = C.bin_end(); BI != BE; ++BI) {
Binary *B = &*BI;
if (From == B) {
FoundFrom = true;
D = B->getSize() - FOff;
continue;
}
if (!FoundFrom)
continue;
if (To != B) {
D += B->getSize();
continue;
}
if (To == B) {
D += TOff;
break;
}
}
return D / 16;
};
auto countBits = [](unsigned V) {
unsigned B;
for (B = 0; V; V >>= 1)
B += V & 1;
return B;
};
// Fix the dependency. Assume edges are processed in the linking/program
// order.
for (auto EI = Edges.begin(), EE = Edges.end(); EI != EE; ++EI) {
auto H = EI->getHead();
auto T = EI->getTail();
if (H->to_empty(EI->isHeadDef()) || T->from_empty())
continue;
if (!T->isBarrier())
T->setDistance(getEncodedDistance(T->getBinary(), T->getOffset()));
for (auto RI = T->acc_begin(), RE = T->acc_end(); RI != RE; ++RI) {
RegAccess *To = *RI;
unsigned Reg = To->getRegNo();
for (auto DI = T->from_begin(), DE = T->from_end(); DI != DE; ++DI) {
auto Def = *DI;
for (auto AI = Def->acc_begin(), AE = Def->acc_end(); AI != AE; ++AI) {
RegAccess *From = *AI;
if (From->getRegNo() != Reg)
continue;
bool HasToken;
unsigned Tok;
std::tie(HasToken, Tok) = From->getToken();
if (From->isDef()) {
// RAW or WAW
if (HasToken)
T->mergeWrTokenMask(1 << Tok);
else
T->updateDistance(calcDistance(C,
Def->getBinary(), Def->getOffset(),
T->getBinary(), T->getOffset()));
} else if (To->isDef()) {
// WAR
if (HasToken)
T->mergeRdTokenMask(1 << Tok);
}
}
}
}
// Revise src token as syncing on def token implies syncing on src token.
if (T->getRdTokenMask() != unsigned(-1)) {
unsigned Mask = T->getRdTokenMask();
Mask &= ~T->getWrTokenMask();
T->setRdTokenMask(Mask);
}
// Update SWSB info and check if we need additional sync barrier.
if (T->getDistance() > 7) // Clear distance if it's greater than 7.
T->setDistance(0);
unsigned NumRdToks = countBits(T->getRdTokenMask());
unsigned NumWrToks = countBits(T->getWrTokenMask());
if ((NumRdToks + NumWrToks) > 0) {
// Already has SBID associated. Need to insert additional sync barrier.
DepNode *Node = T;
if (!T->isBarrier())
Node = getDepNode(T->getBinary(), T->getOffset(), true);
// Rd tokens.
if (NumRdToks == 1)
Node->setRdTokenMask(T->getRdTokenMask());
else if (NumRdToks != 0)
Node->setRdTokenMask(unsigned(-1));
// Wr tokens.
if (NumWrToks == 1)
Node->setWrTokenMask(T->getWrTokenMask());
else if (NumWrToks != 0)
Node->setWrTokenMask(unsigned(-1));
T->getBinary()->insertSyncPoint(Node);
}
for (auto DI = T->from_begin(), DE = T->from_end(); DI != DE; ++DI)
(*DI)->clearToNodes(EI->isHeadDef());
T->clearFromNodes();
}
}