summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorJustin Lebar <jlebar@google.com>2016-07-06 21:21:39 +0000
committerJustin Lebar <jlebar@google.com>2016-07-06 21:21:39 +0000
commitfd3a1ac0e29c9bffc20391cf10ba006fff501b12 (patch)
treeed9889634f5784cef47eb86b93eb88bcc90013cd /lib
parentebd3497c508a9d237f4eaa459232f3e444ee0e86 (diff)
[CUDA] Add utility functions for dealing with CUDA versions / architectures.
Summary: Currently our handling of CUDA architectures is scattered all around clang. This patch centralizes it. A key advantage of this centralization is that you can now write a C++ switch on e.g. CudaArch and get a compile error if you don't handle one of the enum values. Reviewers: tra Subscribers: cfe-commits Differential Revision: http://reviews.llvm.org/D21867 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@274681 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Basic/CMakeLists.txt1
-rw-r--r--lib/Basic/Cuda.cpp165
-rw-r--r--lib/Basic/Targets.cpp68
-rw-r--r--lib/Driver/Action.cpp36
-rw-r--r--lib/Driver/Driver.cpp29
-rw-r--r--lib/Driver/Tools.cpp7
6 files changed, 207 insertions, 99 deletions
diff --git a/lib/Basic/CMakeLists.txt b/lib/Basic/CMakeLists.txt
index 7524b8cd12..ad460d4965 100644
--- a/lib/Basic/CMakeLists.txt
+++ b/lib/Basic/CMakeLists.txt
@@ -66,6 +66,7 @@ add_clang_library(clangBasic
Attributes.cpp
Builtins.cpp
CharInfo.cpp
+ Cuda.cpp
Diagnostic.cpp
DiagnosticIDs.cpp
DiagnosticOptions.cpp
diff --git a/lib/Basic/Cuda.cpp b/lib/Basic/Cuda.cpp
new file mode 100644
index 0000000000..b4a60eb31a
--- /dev/null
+++ b/lib/Basic/Cuda.cpp
@@ -0,0 +1,165 @@
+#include "clang/Basic/Cuda.h"
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+
+namespace clang {
+
+const char *CudaVersionToString(CudaVersion V) {
+ switch (V) {
+ case CudaVersion::UNKNOWN:
+ return "unknown";
+ case CudaVersion::CUDA_70:
+ return "7.0";
+ case CudaVersion::CUDA_75:
+ return "7.5";
+ case CudaVersion::CUDA_80:
+ return "8.0";
+ }
+}
+
+const char *CudaArchToString(CudaArch A) {
+ switch (A) {
+ case CudaArch::UNKNOWN:
+ return "unknown";
+ case CudaArch::SM_20:
+ return "sm_20";
+ case CudaArch::SM_21:
+ return "sm_21";
+ case CudaArch::SM_30:
+ return "sm_30";
+ case CudaArch::SM_32:
+ return "sm_32";
+ case CudaArch::SM_35:
+ return "sm_35";
+ case CudaArch::SM_37:
+ return "sm_37";
+ case CudaArch::SM_50:
+ return "sm_50";
+ case CudaArch::SM_52:
+ return "sm_52";
+ case CudaArch::SM_53:
+ return "sm_53";
+ case CudaArch::SM_60:
+ return "sm_60";
+ case CudaArch::SM_61:
+ return "sm_61";
+ case CudaArch::SM_62:
+ return "sm_62";
+ }
+}
+
+CudaArch StringToCudaArch(llvm::StringRef S) {
+ return llvm::StringSwitch<CudaArch>(S)
+ .Case("sm_20", CudaArch::SM_20)
+ .Case("sm_21", CudaArch::SM_21)
+ .Case("sm_30", CudaArch::SM_30)
+ .Case("sm_32", CudaArch::SM_32)
+ .Case("sm_35", CudaArch::SM_35)
+ .Case("sm_37", CudaArch::SM_37)
+ .Case("sm_50", CudaArch::SM_50)
+ .Case("sm_52", CudaArch::SM_52)
+ .Case("sm_53", CudaArch::SM_53)
+ .Case("sm_60", CudaArch::SM_60)
+ .Case("sm_61", CudaArch::SM_61)
+ .Case("sm_62", CudaArch::SM_62)
+ .Default(CudaArch::UNKNOWN);
+}
+
+const char *CudaVirtualArchToString(CudaVirtualArch A) {
+ switch (A) {
+ case CudaVirtualArch::UNKNOWN:
+ return "unknown";
+ case CudaVirtualArch::COMPUTE_20:
+ return "compute_20";
+ case CudaVirtualArch::COMPUTE_30:
+ return "compute_30";
+ case CudaVirtualArch::COMPUTE_32:
+ return "compute_32";
+ case CudaVirtualArch::COMPUTE_35:
+ return "compute_35";
+ case CudaVirtualArch::COMPUTE_37:
+ return "compute_37";
+ case CudaVirtualArch::COMPUTE_50:
+ return "compute_50";
+ case CudaVirtualArch::COMPUTE_52:
+ return "compute_52";
+ case CudaVirtualArch::COMPUTE_53:
+ return "compute_53";
+ case CudaVirtualArch::COMPUTE_60:
+ return "compute_60";
+ case CudaVirtualArch::COMPUTE_61:
+ return "compute_61";
+ case CudaVirtualArch::COMPUTE_62:
+ return "compute_62";
+ }
+}
+
+CudaVirtualArch StringToCudaVirtualArch(llvm::StringRef S) {
+ return llvm::StringSwitch<CudaVirtualArch>(S)
+ .Case("compute_20", CudaVirtualArch::COMPUTE_20)
+ .Case("compute_30", CudaVirtualArch::COMPUTE_30)
+ .Case("compute_32", CudaVirtualArch::COMPUTE_32)
+ .Case("compute_35", CudaVirtualArch::COMPUTE_35)
+ .Case("compute_37", CudaVirtualArch::COMPUTE_37)
+ .Case("compute_50", CudaVirtualArch::COMPUTE_50)
+ .Case("compute_52", CudaVirtualArch::COMPUTE_52)
+ .Case("compute_53", CudaVirtualArch::COMPUTE_53)
+ .Case("compute_60", CudaVirtualArch::COMPUTE_60)
+ .Case("compute_61", CudaVirtualArch::COMPUTE_61)
+ .Case("compute_62", CudaVirtualArch::COMPUTE_62)
+ .Default(CudaVirtualArch::UNKNOWN);
+}
+
+CudaVirtualArch VirtualArchForCudaArch(CudaArch A) {
+ switch (A) {
+ case CudaArch::UNKNOWN:
+ return CudaVirtualArch::UNKNOWN;
+ case CudaArch::SM_20:
+ case CudaArch::SM_21:
+ return CudaVirtualArch::COMPUTE_20;
+ case CudaArch::SM_30:
+ return CudaVirtualArch::COMPUTE_30;
+ case CudaArch::SM_32:
+ return CudaVirtualArch::COMPUTE_32;
+ case CudaArch::SM_35:
+ return CudaVirtualArch::COMPUTE_35;
+ case CudaArch::SM_37:
+ return CudaVirtualArch::COMPUTE_37;
+ case CudaArch::SM_50:
+ return CudaVirtualArch::COMPUTE_50;
+ case CudaArch::SM_52:
+ return CudaVirtualArch::COMPUTE_52;
+ case CudaArch::SM_53:
+ return CudaVirtualArch::COMPUTE_53;
+ case CudaArch::SM_60:
+ return CudaVirtualArch::COMPUTE_60;
+ case CudaArch::SM_61:
+ return CudaVirtualArch::COMPUTE_61;
+ case CudaArch::SM_62:
+ return CudaVirtualArch::COMPUTE_62;
+ }
+}
+
+CudaVersion MinVersionForCudaArch(CudaArch A) {
+ switch (A) {
+ case CudaArch::UNKNOWN:
+ return CudaVersion::UNKNOWN;
+ case CudaArch::SM_20:
+ case CudaArch::SM_21:
+ case CudaArch::SM_30:
+ case CudaArch::SM_32:
+ case CudaArch::SM_35:
+ case CudaArch::SM_37:
+ case CudaArch::SM_50:
+ case CudaArch::SM_52:
+ case CudaArch::SM_53:
+ return CudaVersion::CUDA_70;
+ case CudaArch::SM_60:
+ case CudaArch::SM_61:
+ case CudaArch::SM_62:
+ return CudaVersion::CUDA_80;
+ }
+}
+
+} // namespace clang
diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp
index 9e812ef80c..38e77f0d1a 100644
--- a/lib/Basic/Targets.cpp
+++ b/lib/Basic/Targets.cpp
@@ -12,12 +12,13 @@
//
//===----------------------------------------------------------------------===//
-#include "clang/Basic/TargetInfo.h"
#include "clang/Basic/Builtins.h"
+#include "clang/Basic/Cuda.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/MacroBuilder.h"
#include "clang/Basic/TargetBuiltins.h"
+#include "clang/Basic/TargetInfo.h"
#include "clang/Basic/TargetOptions.h"
#include "clang/Basic/Version.h"
#include "llvm/ADT/APFloat.h"
@@ -1694,23 +1695,7 @@ static const unsigned NVPTXAddrSpaceMap[] = {
class NVPTXTargetInfo : public TargetInfo {
static const char *const GCCRegNames[];
static const Builtin::Info BuiltinInfo[];
-
- // The GPU profiles supported by the NVPTX backend
- enum GPUKind {
- GK_NONE,
- GK_SM20,
- GK_SM21,
- GK_SM30,
- GK_SM32,
- GK_SM35,
- GK_SM37,
- GK_SM50,
- GK_SM52,
- GK_SM53,
- GK_SM60,
- GK_SM61,
- GK_SM62,
- } GPU;
+ CudaArch GPU;
public:
NVPTXTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
@@ -1723,8 +1708,7 @@ public:
// Define available target features
// These must be defined in sorted order!
NoAsmVariants = true;
- // Set the default GPU to sm20
- GPU = GK_SM20;
+ GPU = CudaArch::SM_20;
// If possible, get a TargetInfo for our host triple, so we can match its
// types.
@@ -1793,32 +1777,32 @@ public:
// Set __CUDA_ARCH__ for the GPU specified.
std::string CUDAArchCode = [this] {
switch (GPU) {
- case GK_NONE:
+ case CudaArch::UNKNOWN:
assert(false && "No GPU arch when compiling CUDA device code.");
return "";
- case GK_SM20:
+ case CudaArch::SM_20:
return "200";
- case GK_SM21:
+ case CudaArch::SM_21:
return "210";
- case GK_SM30:
+ case CudaArch::SM_30:
return "300";
- case GK_SM32:
+ case CudaArch::SM_32:
return "320";
- case GK_SM35:
+ case CudaArch::SM_35:
return "350";
- case GK_SM37:
+ case CudaArch::SM_37:
return "370";
- case GK_SM50:
+ case CudaArch::SM_50:
return "500";
- case GK_SM52:
+ case CudaArch::SM_52:
return "520";
- case GK_SM53:
+ case CudaArch::SM_53:
return "530";
- case GK_SM60:
+ case CudaArch::SM_60:
return "600";
- case GK_SM61:
+ case CudaArch::SM_61:
return "610";
- case GK_SM62:
+ case CudaArch::SM_62:
return "620";
}
}();
@@ -1862,22 +1846,8 @@ public:
return TargetInfo::CharPtrBuiltinVaList;
}
bool setCPU(const std::string &Name) override {
- GPU = llvm::StringSwitch<GPUKind>(Name)
- .Case("sm_20", GK_SM20)
- .Case("sm_21", GK_SM21)
- .Case("sm_30", GK_SM30)
- .Case("sm_32", GK_SM32)
- .Case("sm_35", GK_SM35)
- .Case("sm_37", GK_SM37)
- .Case("sm_50", GK_SM50)
- .Case("sm_52", GK_SM52)
- .Case("sm_53", GK_SM53)
- .Case("sm_60", GK_SM60)
- .Case("sm_61", GK_SM61)
- .Case("sm_62", GK_SM62)
- .Default(GK_NONE);
-
- return GPU != GK_NONE;
+ GPU = StringToCudaArch(Name);
+ return GPU != CudaArch::UNKNOWN;
}
void setSupportedOpenCLOpts() override {
auto &Opts = getSupportedOpenCLOpts();
diff --git a/lib/Driver/Action.cpp b/lib/Driver/Action.cpp
index f9e1024f92..425f315ccd 100644
--- a/lib/Driver/Action.cpp
+++ b/lib/Driver/Action.cpp
@@ -51,43 +51,11 @@ void BindArchAction::anchor() {}
BindArchAction::BindArchAction(Action *Input, const char *_ArchName)
: Action(BindArchClass, Input), ArchName(_ArchName) {}
-// Converts CUDA GPU architecture, e.g. "sm_21", to its corresponding virtual
-// compute arch, e.g. "compute_20". Returns null if the input arch is null or
-// doesn't match an existing arch.
-static const char* GpuArchToComputeName(const char *ArchName) {
- if (!ArchName)
- return nullptr;
- return llvm::StringSwitch<const char *>(ArchName)
- .Cases("sm_20", "sm_21", "compute_20")
- .Case("sm_30", "compute_30")
- .Case("sm_32", "compute_32")
- .Case("sm_35", "compute_35")
- .Case("sm_37", "compute_37")
- .Case("sm_50", "compute_50")
- .Case("sm_52", "compute_52")
- .Case("sm_53", "compute_53")
- .Case("sm_60", "compute_60")
- .Case("sm_61", "compute_61")
- .Case("sm_62", "compute_62")
- .Default(nullptr);
-}
-
void CudaDeviceAction::anchor() {}
-CudaDeviceAction::CudaDeviceAction(Action *Input, const char *ArchName,
+CudaDeviceAction::CudaDeviceAction(Action *Input, CudaArch Arch,
bool AtTopLevel)
- : Action(CudaDeviceClass, Input), GpuArchName(ArchName),
- AtTopLevel(AtTopLevel) {
- assert(!GpuArchName || IsValidGpuArchName(GpuArchName));
-}
-
-const char *CudaDeviceAction::getComputeArchName() const {
- return GpuArchToComputeName(GpuArchName);
-}
-
-bool CudaDeviceAction::IsValidGpuArchName(llvm::StringRef ArchName) {
- return GpuArchToComputeName(ArchName.data()) != nullptr;
-}
+ : Action(CudaDeviceClass, Input), GpuArch(Arch), AtTopLevel(AtTopLevel) {}
void CudaHostAction::anchor() {}
diff --git a/lib/Driver/Driver.cpp b/lib/Driver/Driver.cpp
index 7235bd0efc..78c3125cdb 100644
--- a/lib/Driver/Driver.cpp
+++ b/lib/Driver/Driver.cpp
@@ -23,6 +23,7 @@
#include "clang/Driver/ToolChain.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/StringSwitch.h"
@@ -1022,9 +1023,10 @@ static unsigned PrintActions1(const Compilation &C, Action *A,
os << '"' << BIA->getArchName() << '"' << ", {"
<< PrintActions1(C, *BIA->input_begin(), Ids) << "}";
} else if (CudaDeviceAction *CDA = dyn_cast<CudaDeviceAction>(A)) {
- os << '"'
- << (CDA->getGpuArchName() ? CDA->getGpuArchName() : "(multiple archs)")
- << '"' << ", {" << PrintActions1(C, *CDA->input_begin(), Ids) << "}";
+ CudaArch Arch = CDA->getGpuArch();
+ if (Arch != CudaArch::UNKNOWN)
+ os << "'" << CudaArchToString(Arch) << "', ";
+ os << "{" << PrintActions1(C, *CDA->input_begin(), Ids) << "}";
} else {
const ActionList *AL;
if (CudaHostAction *CHA = dyn_cast<CudaHostAction>(A)) {
@@ -1380,24 +1382,25 @@ static Action *buildCudaActions(Compilation &C, DerivedArgList &Args,
return C.MakeAction<CudaHostAction>(HostAction, ActionList());
// Collect all cuda_gpu_arch parameters, removing duplicates.
- SmallVector<const char *, 4> GpuArchList;
- llvm::StringSet<> GpuArchNames;
+ SmallVector<CudaArch, 4> GpuArchList;
+ llvm::SmallSet<CudaArch, 4> GpuArchs;
for (Arg *A : Args) {
if (!A->getOption().matches(options::OPT_cuda_gpu_arch_EQ))
continue;
A->claim();
- const auto& Arch = A->getValue();
- if (!CudaDeviceAction::IsValidGpuArchName(Arch))
- C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << Arch;
- else if (GpuArchNames.insert(Arch).second)
+ const auto &ArchStr = A->getValue();
+ CudaArch Arch = StringToCudaArch(ArchStr);
+ if (Arch == CudaArch::UNKNOWN)
+ C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr;
+ else if (GpuArchs.insert(Arch).second)
GpuArchList.push_back(Arch);
}
// Default to sm_20 which is the lowest common denominator for supported GPUs.
// sm_20 code should work correctly, if suboptimally, on all newer GPUs.
if (GpuArchList.empty())
- GpuArchList.push_back("sm_20");
+ GpuArchList.push_back(CudaArch::SM_20);
// Replicate inputs for each GPU architecture.
Driver::InputList CudaDeviceInputs;
@@ -1463,7 +1466,7 @@ static Action *buildCudaActions(Compilation &C, DerivedArgList &Args,
}
auto FatbinAction = C.MakeAction<CudaDeviceAction>(
C.MakeAction<LinkJobAction>(DeviceActions, types::TY_CUDA_FATBIN),
- /* GpuArchName = */ nullptr,
+ CudaArch::UNKNOWN,
/* AtTopLevel = */ false);
// Return a new host action that incorporates original host action and all
// device actions.
@@ -2047,8 +2050,8 @@ InputInfo Driver::BuildJobsForActionNoCache(
// Call BuildJobsForAction() again, now with correct device parameters.
InputInfo II = BuildJobsForAction(
C, *CDA->input_begin(), C.getSingleOffloadToolChain<Action::OFK_Cuda>(),
- CDA->getGpuArchName(), CDA->isAtTopLevel(), /*MultipleArchs=*/true,
- LinkingOutput, CachedResults);
+ CudaArchToString(CDA->getGpuArch()), CDA->isAtTopLevel(),
+ /*MultipleArchs=*/true, LinkingOutput, CachedResults);
// Currently II's Action is *CDA->input_begin(). Set it to CDA instead, so
// that one can retrieve II's GPU arch.
II.setAction(A);
diff --git a/lib/Driver/Tools.cpp b/lib/Driver/Tools.cpp
index 2425619d4c..14ed0d4e3e 100644
--- a/lib/Driver/Tools.cpp
+++ b/lib/Driver/Tools.cpp
@@ -11222,9 +11222,10 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
auto* A = cast<const CudaDeviceAction>(II.getAction());
// We need to pass an Arch of the form "sm_XX" for cubin files and
// "compute_XX" for ptx.
- const char *Arch = (II.getType() == types::TY_PP_Asm)
- ? A->getComputeArchName()
- : A->getGpuArchName();
+ const char *Arch =
+ (II.getType() == types::TY_PP_Asm)
+ ? CudaVirtualArchToString(VirtualArchForCudaArch(A->getGpuArch()))
+ : CudaArchToString(A->getGpuArch());
CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") +
Arch + ",file=" + II.getFilename()));
}