summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPavel Iliin <Pavel.Iliin@arm.com>2024-05-03 18:07:17 +0100
committerGitHub <noreply@github.com>2024-05-03 18:07:17 +0100
commit804202292b7601feee5c091a3a6df6124f4d61e1 (patch)
tree24772fc3b853bd8470cd3521d1ffe96587a90ef1
parent8a0073ad4658033b6a4f6bae4fbaf924ac813bc6 (diff)
[FMV][AArch64] Don't optimize backward compatible features in resolver. (#90928)
For arch64 features, such as Branch Target Identification or MTE (Memory Tagging Extension), compatible with targets that lack their support we may encounter scenarios where a binary compiled with MTE for example is executed on both MTE and non-MTE hardware and we still need to detect at runtime whether the MTE feature is available to choose the appropriate function version. So, we cannot optimize the function multi versioning resolver by removing checks for these features enabled for the target during compilation.
-rw-r--r--clang/lib/CodeGen/CodeGenFunction.cpp10
-rw-r--r--clang/test/CodeGen/attr-target-clones-aarch64.c29
2 files changed, 35 insertions, 4 deletions
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 546beae4af59..477814140a9e 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -2761,8 +2761,14 @@ llvm::Value *CodeGenFunction::FormAArch64ResolverCondition(
const MultiVersionResolverOption &RO) {
llvm::SmallVector<StringRef, 8> CondFeatures;
for (const StringRef &Feature : RO.Conditions.Features) {
- // Form condition for features which are not yet enabled in target
- if (!getContext().getTargetInfo().hasFeature(Feature))
+ // Optimize the Function Multi Versioning resolver by creating conditions
+ // only for features that are not enabled in the target. The exception is
+ // for features whose extension instructions are executed as NOP on targets
+ // without extension support.
+ if (!getContext().getTargetInfo().hasFeature(Feature) ||
+ Feature.equals("bti") || Feature.equals("memtag") ||
+ Feature.equals("memtag2") || Feature.equals("memtag3") ||
+ Feature.equals("dgh"))
CondFeatures.push_back(Feature);
}
if (!CondFeatures.empty()) {
diff --git a/clang/test/CodeGen/attr-target-clones-aarch64.c b/clang/test/CodeGen/attr-target-clones-aarch64.c
index f75d8a69ebf0..603d067864b4 100644
--- a/clang/test/CodeGen/attr-target-clones-aarch64.c
+++ b/clang/test/CodeGen/attr-target-clones-aarch64.c
@@ -526,8 +526,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
// CHECK-MTE-BTI-NEXT: resolver_entry:
// CHECK-MTE-BTI-NEXT: call void @__init_cpu_features_resolver()
// CHECK-MTE-BTI-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 4096
-// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 4096
+// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 17592186048512
+// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 17592186048512
// CHECK-MTE-BTI-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
// CHECK-MTE-BTI-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
// CHECK-MTE-BTI: resolver_return:
@@ -604,7 +604,24 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
//
// CHECK-MTE-BTI-LABEL: @ftc_dup3.resolver(
// CHECK-MTE-BTI-NEXT: resolver_entry:
+// CHECK-MTE-BTI-NEXT: call void @__init_cpu_features_resolver()
+// CHECK-MTE-BTI-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1125899906842624
+// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1125899906842624
+// CHECK-MTE-BTI-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
+// CHECK-MTE-BTI-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
+// CHECK-MTE-BTI: resolver_return:
// CHECK-MTE-BTI-NEXT: ret ptr @ftc_dup3._Mbti
+// CHECK-MTE-BTI: resolver_else:
+// CHECK-MTE-BTI-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-MTE-BTI-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 17592186044416
+// CHECK-MTE-BTI-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 17592186044416
+// CHECK-MTE-BTI-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
+// CHECK-MTE-BTI-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
+// CHECK-MTE-BTI: resolver_return1:
+// CHECK-MTE-BTI-NEXT: ret ptr @ftc_dup3._Mmemtag2
+// CHECK-MTE-BTI: resolver_else2:
+// CHECK-MTE-BTI-NEXT: ret ptr @ftc_dup3.default
//
//
// CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone
@@ -712,7 +729,15 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
// CHECK-MTE-BTI: resolver_return:
// CHECK-MTE-BTI-NEXT: ret ptr @ftc_inline3._MsbMsve
// CHECK-MTE-BTI: resolver_else:
+// CHECK-MTE-BTI-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
+// CHECK-MTE-BTI-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 1125899906842624
+// CHECK-MTE-BTI-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 1125899906842624
+// CHECK-MTE-BTI-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
+// CHECK-MTE-BTI-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
+// CHECK-MTE-BTI: resolver_return1:
// CHECK-MTE-BTI-NEXT: ret ptr @ftc_inline3._Mbti
+// CHECK-MTE-BTI: resolver_else2:
+// CHECK-MTE-BTI-NEXT: ret ptr @ftc_inline3.default
//
//
// CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone