Skip to content

Commit 677da09

Browse files
committed
AArch64: add support for newer Apple CPUs
They're roughly ARMv8.6. This works in the .td file, but in AArch64TargetParser.def, marking them v8.6 brings in support for the SM4 cryptographic hash and we don't actually have that. So TargetParser side they're marked as v8.5, with the extra features (BF16 and I8MM added manually). Finally, A16 supports the HCX extension in addition to v8.6. This has no TargetParser implications.
1 parent e030be6 commit 677da09

File tree

6 files changed

+90
-5
lines changed

6 files changed

+90
-5
lines changed

‎clang/test/Misc/target-invalid-cpu-note.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@
55

66
// RUN: not %clang_cc1 -triple arm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AARCH64
77
// AARCH64: error: unknown target CPU 'not-a-cpu'
8-
// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-m1, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1{{$}}
8+
// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-m1, apple-m2, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1{{$}}
99

1010
// RUN: not %clang_cc1 -triple arm64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_AARCH64
1111
// TUNE_AARCH64: error: unknown target CPU 'not-a-cpu'
12-
// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-m1, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1{{$}}
12+
// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-m1, apple-m2, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1{{$}}
1313

1414
// RUN: not %clang_cc1 -triple i386--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86
1515
// X86: error: unknown target CPU 'not-a-cpu'

‎llvm/include/llvm/Support/AArch64TargetParser.def

+9
Original file line numberDiff line numberDiff line change
@@ -253,8 +253,17 @@ AARCH64_CPU_NAME("apple-a13", ARMV8_4A, FK_CRYPTO_NEON_FP_ARMV8, false,
253253
(AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_SHA3))
254254
AARCH64_CPU_NAME("apple-a14", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false,
255255
(AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_SHA3))
256+
AARCH64_CPU_NAME("apple-a15", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false,
257+
(AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_SHA3 |
258+
AArch64::AEK_BF16 | AArch64::AEK_I8MM))
259+
AARCH64_CPU_NAME("apple-a16", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false,
260+
(AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_SHA3 |
261+
AArch64::AEK_BF16 | AArch64::AEK_I8MM))
256262
AARCH64_CPU_NAME("apple-m1", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false,
257263
(AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_SHA3))
264+
AARCH64_CPU_NAME("apple-m2", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false,
265+
(AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_SHA3 |
266+
AArch64::AEK_BF16 | AArch64::AEK_I8MM))
258267
AARCH64_CPU_NAME("apple-s4", ARMV8_3A, FK_CRYPTO_NEON_FP_ARMV8, false,
259268
(AArch64::AEK_FP16))
260269
AARCH64_CPU_NAME("apple-s5", ARMV8_3A, FK_CRYPTO_NEON_FP_ARMV8, false,

‎llvm/lib/Target/AArch64/AArch64.td

+47-2
Original file line numberDiff line numberDiff line change
@@ -857,6 +857,38 @@ def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
857857
FeatureZCRegMove,
858858
FeatureZCZeroing]>;
859859

860+
def TuneAppleA15 : SubtargetFeature<"apple-a15", "ARMProcFamily", "AppleA15",
861+
"Apple A15", [
862+
FeatureAlternateSExtLoadCVTF32Pattern,
863+
FeatureArithmeticBccFusion,
864+
FeatureArithmeticCbzFusion,
865+
FeatureDisableLatencySchedHeuristic,
866+
FeatureFuseAddress,
867+
FeatureFuseAES,
868+
FeatureFuseArithmeticLogic,
869+
FeatureFuseCCSelect,
870+
FeatureFuseCryptoEOR,
871+
FeatureFuseLiterals,
872+
FeatureZCRegMove,
873+
FeatureZCZeroing
874+
]>;
875+
876+
def TuneAppleA16 : SubtargetFeature<"apple-a16", "ARMProcFamily", "AppleA16",
877+
"Apple A16", [
878+
FeatureAlternateSExtLoadCVTF32Pattern,
879+
FeatureArithmeticBccFusion,
880+
FeatureArithmeticCbzFusion,
881+
FeatureDisableLatencySchedHeuristic,
882+
FeatureFuseAddress,
883+
FeatureFuseAES,
884+
FeatureFuseArithmeticLogic,
885+
FeatureFuseCCSelect,
886+
FeatureFuseCryptoEOR,
887+
FeatureFuseLiterals,
888+
FeatureZCRegMove,
889+
FeatureZCZeroing
890+
]>;
891+
860892
def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
861893
"Samsung Exynos-M3 processors",
862894
[FeatureExynosCheapAsMoveHandling,
@@ -1072,6 +1104,13 @@ def ProcessorFeatures {
10721104
FeaturePredRes, FeatureCacheDeepPersist,
10731105
FeatureFullFP16, FeatureFP16FML, FeatureSHA3,
10741106
FeatureAltFPCmp];
1107+
list<SubtargetFeature> AppleA15 = [HasV8_6aOps, FeatureCrypto, FeatureFPARMv8,
1108+
FeatureNEON, FeaturePerfMon, FeatureSHA3,
1109+
FeatureFullFP16, FeatureFP16FML];
1110+
list<SubtargetFeature> AppleA16 = [HasV8_6aOps, FeatureCrypto, FeatureFPARMv8,
1111+
FeatureNEON, FeaturePerfMon, FeatureSHA3,
1112+
FeatureFullFP16, FeatureFP16FML,
1113+
FeatureHCX];
10751114
list<SubtargetFeature> ExynosM3 = [HasV8_0aOps, FeatureCRC, FeatureCrypto,
10761115
FeaturePerfMon];
10771116
list<SubtargetFeature> ExynosM4 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd,
@@ -1229,10 +1268,16 @@ def : ProcessorModel<"apple-a13", CycloneModel, ProcessorFeatures.AppleA13,
12291268
[TuneAppleA13]>;
12301269
def : ProcessorModel<"apple-a14", CycloneModel, ProcessorFeatures.AppleA14,
12311270
[TuneAppleA14]>;
1271+
def : ProcessorModel<"apple-a15", CycloneModel, ProcessorFeatures.AppleA15,
1272+
[TuneAppleA15]>;
1273+
def : ProcessorModel<"apple-a16", CycloneModel, ProcessorFeatures.AppleA16,
1274+
[TuneAppleA16]>;
12321275

12331276
// Mac CPUs
12341277
def : ProcessorModel<"apple-m1", CycloneModel, ProcessorFeatures.AppleA14,
12351278
[TuneAppleA14]>;
1279+
def : ProcessorModel<"apple-m2", CycloneModel, ProcessorFeatures.AppleA15,
1280+
[TuneAppleA15]>;
12361281

12371282
// watch CPUs.
12381283
def : ProcessorModel<"apple-s4", CycloneModel, ProcessorFeatures.AppleA12,
@@ -1241,8 +1286,8 @@ def : ProcessorModel<"apple-s5", CycloneModel, ProcessorFeatures.AppleA12,
12411286
[TuneAppleA12]>;
12421287

12431288
// Alias for the latest Apple processor model supported by LLVM.
1244-
def : ProcessorModel<"apple-latest", CycloneModel, ProcessorFeatures.AppleA14,
1245-
[TuneAppleA14]>;
1289+
def : ProcessorModel<"apple-latest", CycloneModel, ProcessorFeatures.AppleA16,
1290+
[TuneAppleA16]>;
12461291

12471292
// Fujitsu A64FX
12481293
def : ProcessorModel<"a64fx", A64FXModel, ProcessorFeatures.A64FX,

‎llvm/lib/Target/AArch64/AArch64Subtarget.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,8 @@ void AArch64Subtarget::initializeProperties() {
160160
case AppleA12:
161161
case AppleA13:
162162
case AppleA14:
163+
case AppleA15:
164+
case AppleA16:
163165
CacheLineSize = 64;
164166
PrefetchDistance = 280;
165167
MinPrefetchStride = 2048;

‎llvm/lib/Target/AArch64/AArch64Subtarget.h

+2
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
4747
AppleA12,
4848
AppleA13,
4949
AppleA14,
50+
AppleA15,
51+
AppleA16,
5052
Carmel,
5153
CortexA35,
5254
CortexA53,

‎llvm/unittests/Support/TargetParserTest.cpp

+28-1
Original file line numberDiff line numberDiff line change
@@ -1112,6 +1112,24 @@ INSTANTIATE_TEST_SUITE_P(
11121112
AArch64::AEK_DOTPROD | AArch64::AEK_FP16 |
11131113
AArch64::AEK_FP16FML | AArch64::AEK_SHA3,
11141114
"8.5-A"),
1115+
ARMCPUTestParams("apple-a15", "armv8.5-a", "crypto-neon-fp-armv8",
1116+
AArch64::AEK_CRC | AArch64::AEK_CRYPTO |
1117+
AArch64::AEK_FP | AArch64::AEK_SIMD |
1118+
AArch64::AEK_LSE | AArch64::AEK_RAS |
1119+
AArch64::AEK_RDM | AArch64::AEK_RCPC |
1120+
AArch64::AEK_DOTPROD | AArch64::AEK_FP16 |
1121+
AArch64::AEK_FP16FML | AArch64::AEK_SHA3 |
1122+
AArch64::AEK_BF16 | AArch64::AEK_I8MM,
1123+
"8.5-A"),
1124+
ARMCPUTestParams("apple-a16", "armv8.5-a", "crypto-neon-fp-armv8",
1125+
AArch64::AEK_CRC | AArch64::AEK_CRYPTO |
1126+
AArch64::AEK_FP | AArch64::AEK_SIMD |
1127+
AArch64::AEK_LSE | AArch64::AEK_RAS |
1128+
AArch64::AEK_RDM | AArch64::AEK_RCPC |
1129+
AArch64::AEK_DOTPROD | AArch64::AEK_FP16 |
1130+
AArch64::AEK_FP16FML | AArch64::AEK_SHA3 |
1131+
AArch64::AEK_BF16 | AArch64::AEK_I8MM,
1132+
"8.5-A"),
11151133
ARMCPUTestParams("apple-m1", "armv8.5-a", "crypto-neon-fp-armv8",
11161134
AArch64::AEK_CRC | AArch64::AEK_CRYPTO |
11171135
AArch64::AEK_FP | AArch64::AEK_SIMD |
@@ -1120,6 +1138,15 @@ INSTANTIATE_TEST_SUITE_P(
11201138
AArch64::AEK_DOTPROD | AArch64::AEK_FP16 |
11211139
AArch64::AEK_FP16FML | AArch64::AEK_SHA3,
11221140
"8.5-A"),
1141+
ARMCPUTestParams("apple-m2", "armv8.5-a", "crypto-neon-fp-armv8",
1142+
AArch64::AEK_CRC | AArch64::AEK_CRYPTO |
1143+
AArch64::AEK_FP | AArch64::AEK_SIMD |
1144+
AArch64::AEK_LSE | AArch64::AEK_RAS |
1145+
AArch64::AEK_RDM | AArch64::AEK_RCPC |
1146+
AArch64::AEK_DOTPROD | AArch64::AEK_FP16 |
1147+
AArch64::AEK_FP16FML | AArch64::AEK_SHA3 |
1148+
AArch64::AEK_BF16 | AArch64::AEK_I8MM,
1149+
"8.5-A"),
11231150
ARMCPUTestParams("apple-s4", "armv8.3-a", "crypto-neon-fp-armv8",
11241151
AArch64::AEK_CRC | AArch64::AEK_CRYPTO |
11251152
AArch64::AEK_FP | AArch64::AEK_SIMD |
@@ -1257,7 +1284,7 @@ INSTANTIATE_TEST_SUITE_P(
12571284
AArch64::AEK_LSE | AArch64::AEK_RDM,
12581285
"8.2-A")));
12591286

1260-
static constexpr unsigned NumAArch64CPUArchs = 54;
1287+
static constexpr unsigned NumAArch64CPUArchs = 57;
12611288

12621289
TEST(TargetParserTest, testAArch64CPUArchList) {
12631290
SmallVector<StringRef, NumAArch64CPUArchs> List;

0 commit comments

Comments
 (0)