diff options
author | Pavel Iliin <Pavel.Iliin@arm.com> | 2024-05-03 18:07:17 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-03 18:07:17 +0100 |
commit | 804202292b7601feee5c091a3a6df6124f4d61e1 (patch) | |
tree | 24772fc3b853bd8470cd3521d1ffe96587a90ef1 | |
parent | 8a0073ad4658033b6a4f6bae4fbaf924ac813bc6 (diff) |
[FMV][AArch64] Don't optimize backward compatible features in resolver. (#90928)
For arch64 features, such as Branch Target Identification or MTE (Memory
Tagging Extension), compatible with targets that lack their support we
may encounter scenarios where a binary compiled with MTE for example is
executed on both MTE and non-MTE hardware and we still need to detect at
runtime whether the MTE feature is available to choose the appropriate
function version.
So, we cannot optimize the function multi versioning resolver by
removing checks for these features enabled for the target during
compilation.
-rw-r--r-- | clang/lib/CodeGen/CodeGenFunction.cpp | 10 | ||||
-rw-r--r-- | clang/test/CodeGen/attr-target-clones-aarch64.c | 29 |
2 files changed, 35 insertions, 4 deletions
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 546beae4af59..477814140a9e 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -2761,8 +2761,14 @@ llvm::Value *CodeGenFunction::FormAArch64ResolverCondition( const MultiVersionResolverOption &RO) { llvm::SmallVector<StringRef, 8> CondFeatures; for (const StringRef &Feature : RO.Conditions.Features) { - // Form condition for features which are not yet enabled in target - if (!getContext().getTargetInfo().hasFeature(Feature)) + // Optimize the Function Multi Versioning resolver by creating conditions + // only for features that are not enabled in the target. The exception is + // for features whose extension instructions are executed as NOP on targets + // without extension support. + if (!getContext().getTargetInfo().hasFeature(Feature) || + Feature.equals("bti") || Feature.equals("memtag") || + Feature.equals("memtag2") || Feature.equals("memtag3") || + Feature.equals("dgh")) CondFeatures.push_back(Feature); } if (!CondFeatures.empty()) { diff --git a/clang/test/CodeGen/attr-target-clones-aarch64.c b/clang/test/CodeGen/attr-target-clones-aarch64.c index f75d8a69ebf0..603d067864b4 100644 --- a/clang/test/CodeGen/attr-target-clones-aarch64.c +++ b/clang/test/CodeGen/attr-target-clones-aarch64.c @@ -526,8 +526,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // CHECK-MTE-BTI-NEXT: resolver_entry: // CHECK-MTE-BTI-NEXT: call void @__init_cpu_features_resolver() // CHECK-MTE-BTI-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 4096 -// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 4096 +// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 17592186048512 +// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 17592186048512 // CHECK-MTE-BTI-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] // CHECK-MTE-BTI-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] // CHECK-MTE-BTI: resolver_return: @@ -604,7 +604,24 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // // CHECK-MTE-BTI-LABEL: @ftc_dup3.resolver( // CHECK-MTE-BTI-NEXT: resolver_entry: +// CHECK-MTE-BTI-NEXT: call void @__init_cpu_features_resolver() +// CHECK-MTE-BTI-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1125899906842624 +// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1125899906842624 +// CHECK-MTE-BTI-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]] +// CHECK-MTE-BTI-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]] +// CHECK-MTE-BTI: resolver_return: // CHECK-MTE-BTI-NEXT: ret ptr @ftc_dup3._Mbti +// CHECK-MTE-BTI: resolver_else: +// CHECK-MTE-BTI-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-MTE-BTI-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 17592186044416 +// CHECK-MTE-BTI-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 17592186044416 +// CHECK-MTE-BTI-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] +// CHECK-MTE-BTI-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] +// CHECK-MTE-BTI: resolver_return1: +// CHECK-MTE-BTI-NEXT: ret ptr @ftc_dup3._Mmemtag2 +// CHECK-MTE-BTI: resolver_else2: +// CHECK-MTE-BTI-NEXT: ret ptr @ftc_dup3.default // // // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone @@ -712,7 +729,15 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default")) // CHECK-MTE-BTI: resolver_return: // CHECK-MTE-BTI-NEXT: ret ptr @ftc_inline3._MsbMsve // CHECK-MTE-BTI: resolver_else: +// CHECK-MTE-BTI-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 +// CHECK-MTE-BTI-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 1125899906842624 +// CHECK-MTE-BTI-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 1125899906842624 +// CHECK-MTE-BTI-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]] +// CHECK-MTE-BTI-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]] +// CHECK-MTE-BTI: resolver_return1: // CHECK-MTE-BTI-NEXT: ret ptr @ftc_inline3._Mbti +// CHECK-MTE-BTI: resolver_else2: +// CHECK-MTE-BTI-NEXT: ret ptr @ftc_inline3.default // // // CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone |