summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AArch64/AArch64.td
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64.td')
-rw-r--r--llvm/lib/Target/AArch64/AArch64.td53
1 files changed, 24 insertions, 29 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 6425aa9b091f..3af427d526f8 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -391,9 +391,18 @@ def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates",
"equivalent when the immediate does "
"not fit in the encoding.">;
-def FeatureAddrLSLFast : SubtargetFeature<
- "addr-lsl-fast", "HasAddrLSLFast", "true",
- "Address operands with logical shift of up to 3 places are cheap">;
+// Address operands with shift amount 2 or 3 are fast on all Arm chips except
+// some old Apple cores (A7-A10?) which handle all shifts slowly. Cortex-A57
+// and derived designs through Cortex-X1 take an extra micro-op for shifts
+// of 1 or 4. Other Arm chips handle all shifted operands at the same speed
+// as unshifted operands.
+//
+// We don't try to model the behavior of the old Apple cores because new code
+// targeting A7 is very unlikely to actually run on an A7. The Cortex cores
+// are modeled by FeatureAddrLSLSlow14.
+def FeatureAddrLSLSlow14 : SubtargetFeature<
+ "addr-lsl-slow-14", "HasAddrLSLSlow14", "true",
+ "Address operands with shift amount of 1 or 4 are slow">;
def FeatureALULSLFast : SubtargetFeature<
"alu-lsl-fast", "HasALULSLFast", "true",
@@ -885,6 +894,7 @@ def TuneA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
FeatureBalanceFPOps,
FeatureFuseAdrpAdd,
FeatureFuseLiterals,
+ FeatureAddrLSLSlow14,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
@@ -903,6 +913,7 @@ def TuneA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72",
FeatureFuseAES,
FeatureFuseAdrpAdd,
FeatureFuseLiterals,
+ FeatureAddrLSLSlow14,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
@@ -910,6 +921,7 @@ def TuneA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73",
"Cortex-A73 ARM processors", [
FeatureFuseAES,
FeatureFuseAdrpAdd,
+ FeatureAddrLSLSlow14,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
@@ -917,6 +929,7 @@ def TuneA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75",
"Cortex-A75 ARM processors", [
FeatureFuseAES,
FeatureFuseAdrpAdd,
+ FeatureAddrLSLSlow14,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
@@ -924,7 +937,7 @@ def TuneA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76",
"Cortex-A76 ARM processors", [
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureAddrLSLFast,
+ FeatureAddrLSLSlow14,
FeatureALULSLFast,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
@@ -934,7 +947,7 @@ def TuneA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77",
FeatureCmpBccFusion,
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureAddrLSLFast,
+ FeatureAddrLSLSlow14,
FeatureALULSLFast,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
@@ -944,7 +957,7 @@ def TuneA78 : SubtargetFeature<"a78", "ARMProcFamily", "CortexA78",
FeatureCmpBccFusion,
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureAddrLSLFast,
+ FeatureAddrLSLSlow14,
FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
@@ -956,7 +969,7 @@ def TuneA78AE : SubtargetFeature<"a78ae", "ARMProcFamily",
FeatureCmpBccFusion,
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureAddrLSLFast,
+ FeatureAddrLSLSlow14,
FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
@@ -968,7 +981,7 @@ def TuneA78C : SubtargetFeature<"a78c", "ARMProcFamily",
FeatureCmpBccFusion,
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureAddrLSLFast,
+ FeatureAddrLSLSlow14,
FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
@@ -979,7 +992,6 @@ def TuneA710 : SubtargetFeature<"a710", "ARMProcFamily", "CortexA710",
FeatureCmpBccFusion,
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureAddrLSLFast,
FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
@@ -990,7 +1002,6 @@ def TuneA715 : SubtargetFeature<"a715", "ARMProcFamily", "CortexA715",
FeatureFuseAES,
FeaturePostRAScheduler,
FeatureCmpBccFusion,
- FeatureAddrLSLFast,
FeatureALULSLFast,
FeatureFuseAdrpAdd,
FeatureEnableSelectOptimize,
@@ -1001,7 +1012,6 @@ def TuneA720 : SubtargetFeature<"a720", "ARMProcFamily", "CortexA720",
FeatureFuseAES,
FeaturePostRAScheduler,
FeatureCmpBccFusion,
- FeatureAddrLSLFast,
FeatureALULSLFast,
FeatureFuseAdrpAdd,
FeatureEnableSelectOptimize,
@@ -1012,7 +1022,6 @@ def TuneA720AE : SubtargetFeature<"a720ae", "ARMProcFamily", "CortexA720",
FeatureFuseAES,
FeaturePostRAScheduler,
FeatureCmpBccFusion,
- FeatureAddrLSLFast,
FeatureALULSLFast,
FeatureFuseAdrpAdd,
FeatureEnableSelectOptimize,
@@ -1028,7 +1037,7 @@ def TuneX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1",
FeatureCmpBccFusion,
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureAddrLSLFast,
+ FeatureAddrLSLSlow14,
FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
@@ -1039,7 +1048,6 @@ def TuneX2 : SubtargetFeature<"cortex-x2", "ARMProcFamily", "CortexX2",
FeatureCmpBccFusion,
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureAddrLSLFast,
FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
@@ -1047,7 +1055,6 @@ def TuneX2 : SubtargetFeature<"cortex-x2", "ARMProcFamily", "CortexX2",
def TuneX3 : SubtargetFeature<"cortex-x3", "ARMProcFamily", "CortexX3",
"Cortex-X3 ARM processors", [
- FeatureAddrLSLFast,
FeatureALULSLFast,
FeatureFuseAdrpAdd,
FeatureFuseAES,
@@ -1057,7 +1064,6 @@ def TuneX3 : SubtargetFeature<"cortex-x3", "ARMProcFamily", "CortexX3",
def TuneX4 : SubtargetFeature<"cortex-x4", "ARMProcFamily", "CortexX4",
"Cortex-X4 ARM processors", [
- FeatureAddrLSLFast,
FeatureALULSLFast,
FeatureFuseAdrpAdd,
FeatureFuseAES,
@@ -1215,7 +1221,6 @@ def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
FeatureFuseAdrpAdd,
FeatureFuseLiterals,
FeatureStorePairSuppress,
- FeatureAddrLSLFast,
FeatureALULSLFast,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive]>;
@@ -1234,7 +1239,6 @@ def TuneExynosM4 : SubtargetFeature<"exynosm4", "ARMProcFamily", "ExynosM3",
FeatureFuseAdrpAdd,
FeatureFuseLiterals,
FeatureStorePairSuppress,
- FeatureAddrLSLFast,
FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureZCZeroing]>;
@@ -1244,7 +1248,6 @@ def TuneKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
FeatureZCZeroing,
- FeatureAddrLSLFast,
FeatureALULSLFast,
FeatureStorePairSuppress]>;
@@ -1254,7 +1257,6 @@ def TuneFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor",
FeaturePredictableSelectIsExpensive,
FeatureZCZeroing,
FeatureStorePairSuppress,
- FeatureAddrLSLFast,
FeatureALULSLFast,
FeatureSlowSTRQro]>;
@@ -1268,7 +1270,7 @@ def TuneNeoverseN1 : SubtargetFeature<"neoversen1", "ARMProcFamily", "NeoverseN1
"Neoverse N1 ARM processors", [
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureAddrLSLFast,
+ FeatureAddrLSLSlow14,
FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
@@ -1278,7 +1280,6 @@ def TuneNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily", "NeoverseN2
"Neoverse N2 ARM processors", [
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureAddrLSLFast,
FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
@@ -1288,7 +1289,6 @@ def TuneNeoverse512TVB : SubtargetFeature<"neoverse512tvb", "ARMProcFamily", "Ne
"Neoverse 512-TVB ARM processors", [
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureAddrLSLFast,
FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
@@ -1298,7 +1298,7 @@ def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1
"Neoverse V1 ARM processors", [
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureAddrLSLFast,
+ FeatureAddrLSLSlow14,
FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
@@ -1309,7 +1309,6 @@ def TuneNeoverseV2 : SubtargetFeature<"neoversev2", "ARMProcFamily", "NeoverseV2
"Neoverse V2 ARM processors", [
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureAddrLSLFast,
FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
@@ -1321,7 +1320,6 @@ def TuneSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira",
FeaturePredictableSelectIsExpensive,
FeatureZCZeroing,
FeatureStorePairSuppress,
- FeatureAddrLSLFast,
FeatureALULSLFast]>;
def TuneThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily", "ThunderX2T99",
@@ -1381,7 +1379,6 @@ def TuneAmpere1 : SubtargetFeature<"ampere1", "ARMProcFamily", "Ampere1",
FeaturePostRAScheduler,
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureAddrLSLFast,
FeatureALULSLFast,
FeatureAggressiveFMA,
FeatureArithmeticBccFusion,
@@ -1397,7 +1394,6 @@ def TuneAmpere1A : SubtargetFeature<"ampere1a", "ARMProcFamily", "Ampere1A",
FeaturePostRAScheduler,
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureAddrLSLFast,
FeatureALULSLFast,
FeatureAggressiveFMA,
FeatureArithmeticBccFusion,
@@ -1414,7 +1410,6 @@ def TuneAmpere1B : SubtargetFeature<"ampere1b", "ARMProcFamily", "Ampere1B",
FeaturePostRAScheduler,
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureAddrLSLFast,
FeatureALULSLFast,
FeatureAggressiveFMA,
FeatureArithmeticBccFusion,