Skip to content

Commit 95c8750

Browse files
authored
[AArch64][GlobalISel] Added pmull/pmull64 intrinsic support (llvm#165740)
GISel no longer falls back onto SDAG when attempting to lower the pmull and pmull64 intrinsics.
1 parent d4e3a23 commit 95c8750

File tree

7 files changed

+219
-118
lines changed

7 files changed

+219
-118
lines changed

llvm/lib/Target/AArch64/AArch64InstrGISel.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,12 @@ def G_SMULL : AArch64GenericInstruction {
197197
let hasSideEffects = 0;
198198
}
199199

200+
def G_PMULL : AArch64GenericInstruction {
201+
let OutOperandList = (outs type0:$dst);
202+
let InOperandList = (ins type1:$src1, type1:$src2);
203+
let hasSideEffects = 0;
204+
}
205+
200206
def G_UADDLP : AArch64GenericInstruction {
201207
let OutOperandList = (outs type0:$dst);
202208
let InOperandList = (ins type0:$src1);
@@ -273,6 +279,7 @@ def : GINodeEquiv<G_FCMGT, AArch64fcmgt>;
273279

274280
def : GINodeEquiv<G_BSP, AArch64bsp>;
275281

282+
def : GINodeEquiv<G_PMULL, AArch64pmull>;
276283
def : GINodeEquiv<G_UMULL, AArch64umull>;
277284
def : GINodeEquiv<G_SMULL, AArch64smull>;
278285

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1809,6 +1809,9 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
18091809
return LowerBinOp(TargetOpcode::G_FMAXNUM);
18101810
case Intrinsic::aarch64_neon_fminnm:
18111811
return LowerBinOp(TargetOpcode::G_FMINNUM);
1812+
case Intrinsic::aarch64_neon_pmull:
1813+
case Intrinsic::aarch64_neon_pmull64:
1814+
return LowerBinOp(AArch64::G_PMULL);
18121815
case Intrinsic::aarch64_neon_smull:
18131816
return LowerBinOp(AArch64::G_SMULL);
18141817
case Intrinsic::aarch64_neon_umull:

llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,7 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
560560
case TargetOpcode::G_FCMP:
561561
case TargetOpcode::G_LROUND:
562562
case TargetOpcode::G_LLROUND:
563+
case AArch64::G_PMULL:
563564
return true;
564565
case TargetOpcode::G_INTRINSIC:
565566
switch (cast<GIntrinsic>(MI).getIntrinsicID()) {

llvm/test/CodeGen/AArch64/aarch64-smull.ll

Lines changed: 62 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NEON
33
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve < %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SVE
4-
; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
5-
6-
; CHECK-GI: warning: Instruction selection used fallback path for pmlsl2_v8i16_uzp1
7-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmlsl_pmlsl2_v8i16_uzp1
4+
; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
85

96
define <8 x i16> @smull_v8i8_v8i16(ptr %A, ptr %B) nounwind {
107
; CHECK-LABEL: smull_v8i8_v8i16:
@@ -1832,14 +1829,33 @@ entry:
18321829
}
18331830

18341831
define void @pmlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3) {
1835-
; CHECK-LABEL: pmlsl2_v8i16_uzp1:
1836-
; CHECK: // %bb.0:
1837-
; CHECK-NEXT: ldr q2, [x1, #16]
1838-
; CHECK-NEXT: uzp1 v2.16b, v0.16b, v2.16b
1839-
; CHECK-NEXT: pmull2 v0.8h, v0.16b, v2.16b
1840-
; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h
1841-
; CHECK-NEXT: str q0, [x0]
1842-
; CHECK-NEXT: ret
1832+
; CHECK-NEON-LABEL: pmlsl2_v8i16_uzp1:
1833+
; CHECK-NEON: // %bb.0:
1834+
; CHECK-NEON-NEXT: ldr q2, [x1, #16]
1835+
; CHECK-NEON-NEXT: uzp1 v2.16b, v0.16b, v2.16b
1836+
; CHECK-NEON-NEXT: pmull2 v0.8h, v0.16b, v2.16b
1837+
; CHECK-NEON-NEXT: sub v0.8h, v1.8h, v0.8h
1838+
; CHECK-NEON-NEXT: str q0, [x0]
1839+
; CHECK-NEON-NEXT: ret
1840+
;
1841+
; CHECK-SVE-LABEL: pmlsl2_v8i16_uzp1:
1842+
; CHECK-SVE: // %bb.0:
1843+
; CHECK-SVE-NEXT: ldr q2, [x1, #16]
1844+
; CHECK-SVE-NEXT: uzp1 v2.16b, v0.16b, v2.16b
1845+
; CHECK-SVE-NEXT: pmull2 v0.8h, v0.16b, v2.16b
1846+
; CHECK-SVE-NEXT: sub v0.8h, v1.8h, v0.8h
1847+
; CHECK-SVE-NEXT: str q0, [x0]
1848+
; CHECK-SVE-NEXT: ret
1849+
;
1850+
; CHECK-GI-LABEL: pmlsl2_v8i16_uzp1:
1851+
; CHECK-GI: // %bb.0:
1852+
; CHECK-GI-NEXT: ldr q2, [x1, #16]
1853+
; CHECK-GI-NEXT: mov d0, v0.d[1]
1854+
; CHECK-GI-NEXT: xtn v2.8b, v2.8h
1855+
; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v2.8b
1856+
; CHECK-GI-NEXT: sub v0.8h, v1.8h, v0.8h
1857+
; CHECK-GI-NEXT: str q0, [x0]
1858+
; CHECK-GI-NEXT: ret
18431859
%5 = getelementptr inbounds i32, ptr %3, i64 4
18441860
%6 = load <8 x i16>, ptr %5, align 4
18451861
%7 = trunc <8 x i16> %6 to <8 x i8>
@@ -1991,16 +2007,40 @@ define void @umlsl2_v4i32_uzp1(<8 x i16> %0, <4 x i32> %1, ptr %2, ptr %3) {
19912007
}
19922008

19932009
define void @pmlsl_pmlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3, i32 %4) {
1994-
; CHECK-LABEL: pmlsl_pmlsl2_v8i16_uzp1:
1995-
; CHECK: // %bb.0: // %entry
1996-
; CHECK-NEXT: ldp q2, q3, [x1]
1997-
; CHECK-NEXT: uzp1 v2.16b, v2.16b, v3.16b
1998-
; CHECK-NEXT: pmull v3.8h, v0.8b, v2.8b
1999-
; CHECK-NEXT: pmull2 v0.8h, v0.16b, v2.16b
2000-
; CHECK-NEXT: add v0.8h, v3.8h, v0.8h
2001-
; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h
2002-
; CHECK-NEXT: str q0, [x0]
2003-
; CHECK-NEXT: ret
2010+
; CHECK-NEON-LABEL: pmlsl_pmlsl2_v8i16_uzp1:
2011+
; CHECK-NEON: // %bb.0: // %entry
2012+
; CHECK-NEON-NEXT: ldp q2, q3, [x1]
2013+
; CHECK-NEON-NEXT: uzp1 v2.16b, v2.16b, v3.16b
2014+
; CHECK-NEON-NEXT: pmull v3.8h, v0.8b, v2.8b
2015+
; CHECK-NEON-NEXT: pmull2 v0.8h, v0.16b, v2.16b
2016+
; CHECK-NEON-NEXT: add v0.8h, v3.8h, v0.8h
2017+
; CHECK-NEON-NEXT: sub v0.8h, v1.8h, v0.8h
2018+
; CHECK-NEON-NEXT: str q0, [x0]
2019+
; CHECK-NEON-NEXT: ret
2020+
;
2021+
; CHECK-SVE-LABEL: pmlsl_pmlsl2_v8i16_uzp1:
2022+
; CHECK-SVE: // %bb.0: // %entry
2023+
; CHECK-SVE-NEXT: ldp q2, q3, [x1]
2024+
; CHECK-SVE-NEXT: uzp1 v2.16b, v2.16b, v3.16b
2025+
; CHECK-SVE-NEXT: pmull v3.8h, v0.8b, v2.8b
2026+
; CHECK-SVE-NEXT: pmull2 v0.8h, v0.16b, v2.16b
2027+
; CHECK-SVE-NEXT: add v0.8h, v3.8h, v0.8h
2028+
; CHECK-SVE-NEXT: sub v0.8h, v1.8h, v0.8h
2029+
; CHECK-SVE-NEXT: str q0, [x0]
2030+
; CHECK-SVE-NEXT: ret
2031+
;
2032+
; CHECK-GI-LABEL: pmlsl_pmlsl2_v8i16_uzp1:
2033+
; CHECK-GI: // %bb.0: // %entry
2034+
; CHECK-GI-NEXT: ldp q2, q3, [x1]
2035+
; CHECK-GI-NEXT: mov d4, v0.d[1]
2036+
; CHECK-GI-NEXT: xtn v2.8b, v2.8h
2037+
; CHECK-GI-NEXT: xtn v3.8b, v3.8h
2038+
; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v2.8b
2039+
; CHECK-GI-NEXT: pmull v2.8h, v4.8b, v3.8b
2040+
; CHECK-GI-NEXT: add v0.8h, v0.8h, v2.8h
2041+
; CHECK-GI-NEXT: sub v0.8h, v1.8h, v0.8h
2042+
; CHECK-GI-NEXT: str q0, [x0]
2043+
; CHECK-GI-NEXT: ret
20042044
entry:
20052045
%5 = load <8 x i16>, ptr %3, align 4
20062046
%6 = trunc <8 x i16> %5 to <8 x i8>

llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll

Lines changed: 35 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon,+aes | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3-
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon,+aes -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4-
5-
; CHECK-GI: warning: Instruction selection used fallback path for test_vmull_p8
6-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_high_p8
7-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_p64
8-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_high_p64
3+
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon,+aes -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
94

105
declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>)
116
declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64) #5
@@ -2721,27 +2716,47 @@ entry:
27212716
}
27222717

27232718
define i128 @test_vmull_p64(i64 %a, i64 %b) #4 {
2724-
; CHECK-LABEL: test_vmull_p64:
2725-
; CHECK: // %bb.0: // %entry
2726-
; CHECK-NEXT: fmov d0, x1
2727-
; CHECK-NEXT: fmov d1, x0
2728-
; CHECK-NEXT: pmull v0.1q, v1.1d, v0.1d
2729-
; CHECK-NEXT: mov x1, v0.d[1]
2730-
; CHECK-NEXT: fmov x0, d0
2731-
; CHECK-NEXT: ret
2719+
; CHECK-SD-LABEL: test_vmull_p64:
2720+
; CHECK-SD: // %bb.0: // %entry
2721+
; CHECK-SD-NEXT: fmov d0, x1
2722+
; CHECK-SD-NEXT: fmov d1, x0
2723+
; CHECK-SD-NEXT: pmull v0.1q, v1.1d, v0.1d
2724+
; CHECK-SD-NEXT: mov x1, v0.d[1]
2725+
; CHECK-SD-NEXT: fmov x0, d0
2726+
; CHECK-SD-NEXT: ret
2727+
;
2728+
; CHECK-GI-LABEL: test_vmull_p64:
2729+
; CHECK-GI: // %bb.0: // %entry
2730+
; CHECK-GI-NEXT: fmov d0, x0
2731+
; CHECK-GI-NEXT: fmov d1, x1
2732+
; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d
2733+
; CHECK-GI-NEXT: mov d1, v0.d[1]
2734+
; CHECK-GI-NEXT: fmov x0, d0
2735+
; CHECK-GI-NEXT: fmov x1, d1
2736+
; CHECK-GI-NEXT: ret
27322737
entry:
27332738
%vmull2.i = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %a, i64 %b)
27342739
%vmull3.i = bitcast <16 x i8> %vmull2.i to i128
27352740
ret i128 %vmull3.i
27362741
}
27372742

27382743
define i128 @test_vmull_high_p64(<2 x i64> %a, <2 x i64> %b) #4 {
2739-
; CHECK-LABEL: test_vmull_high_p64:
2740-
; CHECK: // %bb.0: // %entry
2741-
; CHECK-NEXT: pmull2 v0.1q, v0.2d, v1.2d
2742-
; CHECK-NEXT: mov x1, v0.d[1]
2743-
; CHECK-NEXT: fmov x0, d0
2744-
; CHECK-NEXT: ret
2744+
; CHECK-SD-LABEL: test_vmull_high_p64:
2745+
; CHECK-SD: // %bb.0: // %entry
2746+
; CHECK-SD-NEXT: pmull2 v0.1q, v0.2d, v1.2d
2747+
; CHECK-SD-NEXT: mov x1, v0.d[1]
2748+
; CHECK-SD-NEXT: fmov x0, d0
2749+
; CHECK-SD-NEXT: ret
2750+
;
2751+
; CHECK-GI-LABEL: test_vmull_high_p64:
2752+
; CHECK-GI: // %bb.0: // %entry
2753+
; CHECK-GI-NEXT: mov d0, v0.d[1]
2754+
; CHECK-GI-NEXT: mov d1, v1.d[1]
2755+
; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d
2756+
; CHECK-GI-NEXT: mov d1, v0.d[1]
2757+
; CHECK-GI-NEXT: fmov x0, d0
2758+
; CHECK-GI-NEXT: fmov x1, d1
2759+
; CHECK-GI-NEXT: ret
27452760
entry:
27462761
%0 = extractelement <2 x i64> %a, i32 1
27472762
%1 = extractelement <2 x i64> %b, i32 1

0 commit comments

Comments
 (0)