[CUDA][OpenMP] Create generic offload action

Summary: This patch replaces the CUDA specific action by a generic offload action. The offload action may have multiple dependences classier in “host” and “device”. The way this generic offloading action is used is very similar to what is done today by the CUDA implementation: it is used to set a specific toolchain and architecture to its dependences during the generation of jobs. This patch also proposes propagating the offloading information through the action graph so that that information can be easily retrieved at any time during the generation of commands. This allows e.g. the "clang tool” to evaluate whether CUDA should be supported for the device or host and ptas to easily retrieve the target architecture. This is an example of how the action graphs would look like (compilation of a single CUDA file with two GPU architectures) ``` 0: input, "cudatests.cu", cuda, (host-cuda) 1: preprocessor, {0}, cuda-cpp-output, (host-cuda) 2: compiler, {1}, ir, (host-cuda) 3: input, "cudatests.cu", cuda, (device-cuda, sm_35) 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_35) 5: compiler, {4}, ir, (device-cuda, sm_35) 6: backend, {5}, assembler, (device-cuda, sm_35) 7: assembler, {6}, object, (device-cuda, sm_35) 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {7}, object 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_35)" {6}, assembler 10: input, "cudatests.cu", cuda, (device-cuda, sm_37) 11: preprocessor, {10}, cuda-cpp-output, (device-cuda, sm_37) 12: compiler, {11}, ir, (device-cuda, sm_37) 13: backend, {12}, assembler, (device-cuda, sm_37) 14: assembler, {13}, object, (device-cuda, sm_37) 15: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {14}, object 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_37)" {13}, assembler 17: linker, {8, 9, 15, 16}, cuda-fatbin, (device-cuda) 18: offload, "host-cuda (powerpc64le-unknown-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {17}, ir 19: backend, {18}, assembler 20: assembler, {19}, object 21: input, "cuda", object 22: input, "cudart", object 23: linker, {20, 21, 22}, image ``` The changes in this patch pass the existent regression tests (keeps the existent functionality) and resulting binaries execute correctly in a Power8+K40 machine. Reviewers: echristo, hfinkel, jlebar, ABataev, tra Subscribers: guansong, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin Differential Revision: https://reviews.llvm.org/D18171 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@275645 91177308-0d34-0410-b5e6-96231b3b80d8
author: Samuel Antao <sfantao@us.ibm.com> 2016-07-15 23:13:27 +0000
committer: Samuel Antao <sfantao@us.ibm.com> 2016-07-15 23:13:27 +0000
commit: 951912906e6bfdf8b1938e43601e920f85736fe0 (patch)
tree: 4851715f7e98be1161444232eeb89e26b89a91d3 /lib/Driver/Action.cpp
parent: 51856a5426575f4c87c5233625cfc8123e4300f3 (diff)
1 files changed, 208 insertions, 9 deletions
diff --git a/lib/Driver/Action.cpp b/lib/Driver/Action.cpp
index 7982f51f07..a98b5c1bba 100644
--- a/lib/Driver/Action.cpp
+++ b/lib/Driver/Action.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Driver/Action.h"
+#include "clang/Driver/ToolChain.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Regex.h"
@@ -21,8 +22,8 @@ const char *Action::getClassName(ActionClass AC) {
   switch (AC) {
   case InputClass: return "input";
   case BindArchClass: return "bind-arch";
-  case CudaDeviceClass: return "cuda-device";
-  case CudaHostClass: return "cuda-host";
+  case OffloadClass:
+    return "offload";
   case PreprocessJobClass: return "preprocessor";
   case PrecompileJobClass: return "precompiler";
   case AnalyzeJobClass: return "analyzer";
@@ -40,6 +41,82 @@ const char *Action::getClassName(ActionClass AC) {
   llvm_unreachable("invalid class");
 }
 
+void Action::propagateDeviceOffloadInfo(OffloadKind OKind, const char *OArch) {
+  // Offload action set its own kinds on their dependences.
+  if (Kind == OffloadClass)
+    return;
+
+  assert((OffloadingDeviceKind == OKind || OffloadingDeviceKind == OFK_None) &&
+         "Setting device kind to a different device??");
+  assert(!ActiveOffloadKindMask && "Setting a device kind in a host action??");
+  OffloadingDeviceKind = OKind;
+  OffloadingArch = OArch;
+
+  for (auto *A : Inputs)
+    A->propagateDeviceOffloadInfo(OffloadingDeviceKind, OArch);
+}
+
+void Action::propagateHostOffloadInfo(unsigned OKinds, const char *OArch) {
+  // Offload action set its own kinds on their dependences.
+  if (Kind == OffloadClass)
+    return;
+
+  assert(OffloadingDeviceKind == OFK_None &&
+         "Setting a host kind in a device action.");
+  ActiveOffloadKindMask |= OKinds;
+  OffloadingArch = OArch;
+
+  for (auto *A : Inputs)
+    A->propagateHostOffloadInfo(ActiveOffloadKindMask, OArch);
+}
+
+void Action::propagateOffloadInfo(const Action *A) {
+  if (unsigned HK = A->getOffloadingHostActiveKinds())
+    propagateHostOffloadInfo(HK, A->getOffloadingArch());
+  else
+    propagateDeviceOffloadInfo(A->getOffloadingDeviceKind(),
+                               A->getOffloadingArch());
+}
+
+std::string Action::getOffloadingKindPrefix() const {
+  switch (OffloadingDeviceKind) {
+  case OFK_None:
+    break;
+  case OFK_Host:
+    llvm_unreachable("Host kind is not an offloading device kind.");
+    break;
+  case OFK_Cuda:
+    return "device-cuda";
+
+    // TODO: Add other programming models here.
+  }
+
+  if (!ActiveOffloadKindMask)
+    return "";
+
+  std::string Res("host");
+  if (ActiveOffloadKindMask & OFK_Cuda)
+    Res += "-cuda";
+
+  // TODO: Add other programming models here.
+
+  return Res;
+}
+
+std::string
+Action::getOffloadingFileNamePrefix(StringRef NormalizedTriple) const {
+  // A file prefix is only generated for device actions and consists of the
+  // offload kind and triple.
+  if (!OffloadingDeviceKind)
+    return "";
+
+  std::string Res("-");
+  Res += getOffloadingKindPrefix();
+  Res += "-";
+  Res += NormalizedTriple;
+  return Res;
+}
+
 void InputAction::anchor() {}
 
 InputAction::InputAction(const Arg &_Input, types::ID _Type)
@@ -51,16 +128,138 @@ void BindArchAction::anchor() {}
 BindArchAction::BindArchAction(Action *Input, const char *_ArchName)
     : Action(BindArchClass, Input), ArchName(_ArchName) {}
 
-void CudaDeviceAction::anchor() {}
+void OffloadAction::anchor() {}
+
+OffloadAction::OffloadAction(const HostDependence &HDep)
+    : Action(OffloadClass, HDep.getAction()), HostTC(HDep.getToolChain()) {
+  OffloadingArch = HDep.getBoundArch();
+  ActiveOffloadKindMask = HDep.getOffloadKinds();
+  HDep.getAction()->propagateHostOffloadInfo(HDep.getOffloadKinds(),
+                                             HDep.getBoundArch());
+};
+
+OffloadAction::OffloadAction(const DeviceDependences &DDeps, types::ID Ty)
+    : Action(OffloadClass, DDeps.getActions(), Ty),
+      DevToolChains(DDeps.getToolChains()) {
+  auto &OKinds = DDeps.getOffloadKinds();
+  auto &BArchs = DDeps.getBoundArchs();
+
+  // If all inputs agree on the same kind, use it also for this action.
+  if (llvm::all_of(OKinds, [&](OffloadKind K) { return K == OKinds.front(); }))
+    OffloadingDeviceKind = OKinds.front();
+
+  // If we have a single dependency, inherit the architecture from it.
+  if (OKinds.size() == 1)
+    OffloadingArch = BArchs.front();
+
+  // Propagate info to the dependencies.
+  for (unsigned i = 0, e = getInputs().size(); i != e; ++i)
+    getInputs()[i]->propagateDeviceOffloadInfo(OKinds[i], BArchs[i]);
+}
+
+OffloadAction::OffloadAction(const HostDependence &HDep,
+                             const DeviceDependences &DDeps)
+    : Action(OffloadClass, HDep.getAction()), HostTC(HDep.getToolChain()),
+      DevToolChains(DDeps.getToolChains()) {
+  // We use the kinds of the host dependence for this action.
+  OffloadingArch = HDep.getBoundArch();
+  ActiveOffloadKindMask = HDep.getOffloadKinds();
+  HDep.getAction()->propagateHostOffloadInfo(HDep.getOffloadKinds(),
+                                             HDep.getBoundArch());
+
+  // Add device inputs and propagate info to the device actions. Do work only if
+  // we have dependencies.
+  for (unsigned i = 0, e = DDeps.getActions().size(); i != e; ++i)
+    if (auto *A = DDeps.getActions()[i]) {
+      getInputs().push_back(A);
+      A->propagateDeviceOffloadInfo(DDeps.getOffloadKinds()[i],
+                                    DDeps.getBoundArchs()[i]);
+    }
+}
+
+void OffloadAction::doOnHostDependence(const OffloadActionWorkTy &Work) const {
+  if (!HostTC)
+    return;
+  assert(!getInputs().empty() && "No dependencies for offload action??");
+  auto *A = getInputs().front();
+  Work(A, HostTC, A->getOffloadingArch());
+}
 
-CudaDeviceAction::CudaDeviceAction(Action *Input, clang::CudaArch Arch,
-                                   bool AtTopLevel)
-    : Action(CudaDeviceClass, Input), GpuArch(Arch), AtTopLevel(AtTopLevel) {}
+void OffloadAction::doOnEachDeviceDependence(
+    const OffloadActionWorkTy &Work) const {
+  auto I = getInputs().begin();
+  auto E = getInputs().end();
+  if (I == E)
+    return;
+
+  // We expect to have the same number of input dependences and device tool
+  // chains, except if we also have a host dependence. In that case we have one
+  // more dependence than we have device tool chains.
+  assert(getInputs().size() == DevToolChains.size() + (HostTC ? 1 : 0) &&
+         "Sizes of action dependences and toolchains are not consistent!");
+
+  // Skip host action
+  if (HostTC)
+    ++I;
+
+  auto TI = DevToolChains.begin();
+  for (; I != E; ++I, ++TI)
+    Work(*I, *TI, (*I)->getOffloadingArch());
+}
+
+void OffloadAction::doOnEachDependence(const OffloadActionWorkTy &Work) const {
+  doOnHostDependence(Work);
+  doOnEachDeviceDependence(Work);
+}
+
+void OffloadAction::doOnEachDependence(bool IsHostDependence,
+                                       const OffloadActionWorkTy &Work) const {
+  if (IsHostDependence)
+    doOnHostDependence(Work);
+  else
+    doOnEachDeviceDependence(Work);
+}
 
-void CudaHostAction::anchor() {}
+bool OffloadAction::hasHostDependence() const { return HostTC != nullptr; }
 
-CudaHostAction::CudaHostAction(Action *Input, const ActionList &DeviceActions)
-    : Action(CudaHostClass, Input), DeviceActions(DeviceActions) {}
+Action *OffloadAction::getHostDependence() const {
+  assert(hasHostDependence() && "Host dependence does not exist!");
+  assert(!getInputs().empty() && "No dependencies for offload action??");
+  return HostTC ? getInputs().front() : nullptr;
+}
+
+bool OffloadAction::hasSingleDeviceDependence(
+    bool DoNotConsiderHostActions) const {
+  if (DoNotConsiderHostActions)
+    return getInputs().size() == (HostTC ? 2 : 1);
+  return !HostTC && getInputs().size() == 1;
+}
+
+Action *
+OffloadAction::getSingleDeviceDependence(bool DoNotConsiderHostActions) const {
+  assert(hasSingleDeviceDependence(DoNotConsiderHostActions) &&
+         "Single device dependence does not exist!");
+  // The previous assert ensures the number of entries in getInputs() is
+  // consistent with what we are doing here.
+  return HostTC ? getInputs()[1] : getInputs().front();
+}
+
+void OffloadAction::DeviceDependences::add(Action &A, const ToolChain &TC,
+                                           const char *BoundArch,
+                                           OffloadKind OKind) {
+  DeviceActions.push_back(&A);
+  DeviceToolChains.push_back(&TC);
+  DeviceBoundArchs.push_back(BoundArch);
+  DeviceOffloadKinds.push_back(OKind);
+}
+
+OffloadAction::HostDependence::HostDependence(Action &A, const ToolChain &TC,
+                                              const char *BoundArch,
+                                              const DeviceDependences &DDeps)
+    : HostAction(A), HostToolChain(TC), HostBoundArch(BoundArch) {
+  for (auto K : DDeps.getOffloadKinds())
+    HostOffloadKinds |= K;
+}
 
 void JobAction::anchor() {}
author	Samuel Antao <sfantao@us.ibm.com>	2016-07-15 23:13:27 +0000
committer	Samuel Antao <sfantao@us.ibm.com>	2016-07-15 23:13:27 +0000
commit	951912906e6bfdf8b1938e43601e920f85736fe0 (patch)
tree	4851715f7e98be1161444232eeb89e26b89a91d3 /lib/Driver/Action.cpp
parent	51856a5426575f4c87c5233625cfc8123e4300f3 (diff)