diff options
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64.td')
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64.td | 53 |
1 files changed, 24 insertions, 29 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 6425aa9b091f..3af427d526f8 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -391,9 +391,18 @@ def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates", "equivalent when the immediate does " "not fit in the encoding.">; -def FeatureAddrLSLFast : SubtargetFeature< - "addr-lsl-fast", "HasAddrLSLFast", "true", - "Address operands with logical shift of up to 3 places are cheap">; +// Address operands with shift amount 2 or 3 are fast on all Arm chips except +// some old Apple cores (A7-A10?) which handle all shifts slowly. Cortex-A57 +// and derived designs through Cortex-X1 take an extra micro-op for shifts +// of 1 or 4. Other Arm chips handle all shifted operands at the same speed +// as unshifted operands. +// +// We don't try to model the behavior of the old Apple cores because new code +// targeting A7 is very unlikely to actually run on an A7. The Cortex cores +// are modeled by FeatureAddrLSLSlow14. +def FeatureAddrLSLSlow14 : SubtargetFeature< + "addr-lsl-slow-14", "HasAddrLSLSlow14", "true", + "Address operands with shift amount of 1 or 4 are slow">; def FeatureALULSLFast : SubtargetFeature< "alu-lsl-fast", "HasALULSLFast", "true", @@ -885,6 +894,7 @@ def TuneA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", FeatureBalanceFPOps, FeatureFuseAdrpAdd, FeatureFuseLiterals, + FeatureAddrLSLSlow14, FeaturePostRAScheduler, FeatureEnableSelectOptimize, FeaturePredictableSelectIsExpensive]>; @@ -903,6 +913,7 @@ def TuneA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72", FeatureFuseAES, FeatureFuseAdrpAdd, FeatureFuseLiterals, + FeatureAddrLSLSlow14, FeatureEnableSelectOptimize, FeaturePredictableSelectIsExpensive]>; @@ -910,6 +921,7 @@ def TuneA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73", "Cortex-A73 ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, + FeatureAddrLSLSlow14, FeatureEnableSelectOptimize, FeaturePredictableSelectIsExpensive]>; @@ -917,6 +929,7 @@ def TuneA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75", "Cortex-A75 ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, + FeatureAddrLSLSlow14, FeatureEnableSelectOptimize, FeaturePredictableSelectIsExpensive]>; @@ -924,7 +937,7 @@ def TuneA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76", "Cortex-A76 ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, + FeatureAddrLSLSlow14, FeatureALULSLFast, FeatureEnableSelectOptimize, FeaturePredictableSelectIsExpensive]>; @@ -934,7 +947,7 @@ def TuneA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77", FeatureCmpBccFusion, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, + FeatureAddrLSLSlow14, FeatureALULSLFast, FeatureEnableSelectOptimize, FeaturePredictableSelectIsExpensive]>; @@ -944,7 +957,7 @@ def TuneA78 : SubtargetFeature<"a78", "ARMProcFamily", "CortexA78", FeatureCmpBccFusion, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, + FeatureAddrLSLSlow14, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -956,7 +969,7 @@ def TuneA78AE : SubtargetFeature<"a78ae", "ARMProcFamily", FeatureCmpBccFusion, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, + FeatureAddrLSLSlow14, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -968,7 +981,7 @@ def TuneA78C : SubtargetFeature<"a78c", "ARMProcFamily", FeatureCmpBccFusion, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, + FeatureAddrLSLSlow14, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -979,7 +992,6 @@ def TuneA710 : SubtargetFeature<"a710", "ARMProcFamily", "CortexA710", FeatureCmpBccFusion, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -990,7 +1002,6 @@ def TuneA715 : SubtargetFeature<"a715", "ARMProcFamily", "CortexA715", FeatureFuseAES, FeaturePostRAScheduler, FeatureCmpBccFusion, - FeatureAddrLSLFast, FeatureALULSLFast, FeatureFuseAdrpAdd, FeatureEnableSelectOptimize, @@ -1001,7 +1012,6 @@ def TuneA720 : SubtargetFeature<"a720", "ARMProcFamily", "CortexA720", FeatureFuseAES, FeaturePostRAScheduler, FeatureCmpBccFusion, - FeatureAddrLSLFast, FeatureALULSLFast, FeatureFuseAdrpAdd, FeatureEnableSelectOptimize, @@ -1012,7 +1022,6 @@ def TuneA720AE : SubtargetFeature<"a720ae", "ARMProcFamily", "CortexA720", FeatureFuseAES, FeaturePostRAScheduler, FeatureCmpBccFusion, - FeatureAddrLSLFast, FeatureALULSLFast, FeatureFuseAdrpAdd, FeatureEnableSelectOptimize, @@ -1028,7 +1037,7 @@ def TuneX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1", FeatureCmpBccFusion, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, + FeatureAddrLSLSlow14, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -1039,7 +1048,6 @@ def TuneX2 : SubtargetFeature<"cortex-x2", "ARMProcFamily", "CortexX2", FeatureCmpBccFusion, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -1047,7 +1055,6 @@ def TuneX2 : SubtargetFeature<"cortex-x2", "ARMProcFamily", "CortexX2", def TuneX3 : SubtargetFeature<"cortex-x3", "ARMProcFamily", "CortexX3", "Cortex-X3 ARM processors", [ - FeatureAddrLSLFast, FeatureALULSLFast, FeatureFuseAdrpAdd, FeatureFuseAES, @@ -1057,7 +1064,6 @@ def TuneX3 : SubtargetFeature<"cortex-x3", "ARMProcFamily", "CortexX3", def TuneX4 : SubtargetFeature<"cortex-x4", "ARMProcFamily", "CortexX4", "Cortex-X4 ARM processors", [ - FeatureAddrLSLFast, FeatureALULSLFast, FeatureFuseAdrpAdd, FeatureFuseAES, @@ -1215,7 +1221,6 @@ def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3", FeatureFuseAdrpAdd, FeatureFuseLiterals, FeatureStorePairSuppress, - FeatureAddrLSLFast, FeatureALULSLFast, FeaturePostRAScheduler, FeaturePredictableSelectIsExpensive]>; @@ -1234,7 +1239,6 @@ def TuneExynosM4 : SubtargetFeature<"exynosm4", "ARMProcFamily", "ExynosM3", FeatureFuseAdrpAdd, FeatureFuseLiterals, FeatureStorePairSuppress, - FeatureAddrLSLFast, FeatureALULSLFast, FeaturePostRAScheduler, FeatureZCZeroing]>; @@ -1244,7 +1248,6 @@ def TuneKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo", FeaturePostRAScheduler, FeaturePredictableSelectIsExpensive, FeatureZCZeroing, - FeatureAddrLSLFast, FeatureALULSLFast, FeatureStorePairSuppress]>; @@ -1254,7 +1257,6 @@ def TuneFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor", FeaturePredictableSelectIsExpensive, FeatureZCZeroing, FeatureStorePairSuppress, - FeatureAddrLSLFast, FeatureALULSLFast, FeatureSlowSTRQro]>; @@ -1268,7 +1270,7 @@ def TuneNeoverseN1 : SubtargetFeature<"neoversen1", "ARMProcFamily", "NeoverseN1 "Neoverse N1 ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, + FeatureAddrLSLSlow14, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -1278,7 +1280,6 @@ def TuneNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily", "NeoverseN2 "Neoverse N2 ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -1288,7 +1289,6 @@ def TuneNeoverse512TVB : SubtargetFeature<"neoverse512tvb", "ARMProcFamily", "Ne "Neoverse 512-TVB ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -1298,7 +1298,7 @@ def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1 "Neoverse V1 ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, + FeatureAddrLSLSlow14, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -1309,7 +1309,6 @@ def TuneNeoverseV2 : SubtargetFeature<"neoversev2", "ARMProcFamily", "NeoverseV2 "Neoverse V2 ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -1321,7 +1320,6 @@ def TuneSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira", FeaturePredictableSelectIsExpensive, FeatureZCZeroing, FeatureStorePairSuppress, - FeatureAddrLSLFast, FeatureALULSLFast]>; def TuneThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily", "ThunderX2T99", @@ -1381,7 +1379,6 @@ def TuneAmpere1 : SubtargetFeature<"ampere1", "ARMProcFamily", "Ampere1", FeaturePostRAScheduler, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, FeatureALULSLFast, FeatureAggressiveFMA, FeatureArithmeticBccFusion, @@ -1397,7 +1394,6 @@ def TuneAmpere1A : SubtargetFeature<"ampere1a", "ARMProcFamily", "Ampere1A", FeaturePostRAScheduler, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, FeatureALULSLFast, FeatureAggressiveFMA, FeatureArithmeticBccFusion, @@ -1414,7 +1410,6 @@ def TuneAmpere1B : SubtargetFeature<"ampere1b", "ARMProcFamily", "Ampere1B", FeaturePostRAScheduler, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, FeatureALULSLFast, FeatureAggressiveFMA, FeatureArithmeticBccFusion, |