Skip to content

Commit 556dd01

Browse files
authored
[RISCV] Expand multiplication by (2/4/8 * 3/5/9 + 1) << N with SHL_ADD (#166372)
1 parent 868f23f commit 556dd01

File tree

3 files changed

+252
-57
lines changed

3 files changed

+252
-57
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 36 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -16496,32 +16496,42 @@ static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG,
1649616496
}
1649716497

1649816498
static SDValue getShlAddShlAdd(SDNode *N, SelectionDAG &DAG, unsigned ShX,
16499-
unsigned ShY) {
16499+
unsigned ShY, bool AddX) {
1650016500
SDLoc DL(N);
1650116501
EVT VT = N->getValueType(0);
1650216502
SDValue X = N->getOperand(0);
1650316503
SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
1650416504
DAG.getTargetConstant(ShY, DL, VT), X);
1650516505
return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
16506-
DAG.getTargetConstant(ShX, DL, VT), Mul359);
16506+
DAG.getTargetConstant(ShX, DL, VT), AddX ? X : Mul359);
1650716507
}
1650816508

1650916509
static SDValue expandMulToShlAddShlAdd(SDNode *N, SelectionDAG &DAG,
1651016510
uint64_t MulAmt) {
16511+
// 3/5/9 * 3/5/9 -> (shXadd (shYadd X, X), (shYadd X, X))
1651116512
switch (MulAmt) {
1651216513
case 5 * 3:
16513-
return getShlAddShlAdd(N, DAG, 2, 1);
16514+
return getShlAddShlAdd(N, DAG, 2, 1, /*AddX=*/false);
1651416515
case 9 * 3:
16515-
return getShlAddShlAdd(N, DAG, 3, 1);
16516+
return getShlAddShlAdd(N, DAG, 3, 1, /*AddX=*/false);
1651616517
case 5 * 5:
16517-
return getShlAddShlAdd(N, DAG, 2, 2);
16518+
return getShlAddShlAdd(N, DAG, 2, 2, /*AddX=*/false);
1651816519
case 9 * 5:
16519-
return getShlAddShlAdd(N, DAG, 3, 2);
16520+
return getShlAddShlAdd(N, DAG, 3, 2, /*AddX=*/false);
1652016521
case 9 * 9:
16521-
return getShlAddShlAdd(N, DAG, 3, 3);
16522+
return getShlAddShlAdd(N, DAG, 3, 3, /*AddX=*/false);
1652216523
default:
16523-
return SDValue();
16524+
break;
1652416525
}
16526+
16527+
// 2/4/8 * 3/5/9 + 1 -> (shXadd (shYadd X, X), X)
16528+
int ShX;
16529+
if (int ShY = isShifted359(MulAmt - 1, ShX)) {
16530+
assert(ShX != 0 && "MulAmt=4,6,10 handled before");
16531+
if (ShX <= 3)
16532+
return getShlAddShlAdd(N, DAG, ShX, ShY, /*AddX=*/true);
16533+
}
16534+
return SDValue();
1652516535
}
1652616536

1652716537
// Try to expand a scalar multiply to a faster sequence.
@@ -16581,41 +16591,30 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
1658116591
DAG.getConstant(Shift, DL, VT));
1658216592
}
1658316593

16584-
// 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
16585-
if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt))
16586-
return V;
16594+
// 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
16595+
// of 25 which happen to be quite common.
16596+
// (2/4/8 * 3/5/9 + 1) * 2^N
16597+
Shift = llvm::countr_zero(MulAmt);
16598+
if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt >> Shift)) {
16599+
if (Shift == 0)
16600+
return V;
16601+
SDLoc DL(N);
16602+
return DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(Shift, DL, VT));
16603+
}
1658716604

1658816605
// If this is a power 2 + 2/4/8, we can use a shift followed by a single
1658916606
// shXadd. First check if this a sum of two power of 2s because that's
1659016607
// easy. Then count how many zeros are up to the first bit.
16591-
if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
16592-
unsigned ScaleShift = llvm::countr_zero(MulAmt);
16593-
if (ScaleShift >= 1 && ScaleShift < 4) {
16594-
unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
16595-
SDLoc DL(N);
16596-
SDValue Shift1 =
16597-
DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
16598-
return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16599-
DAG.getTargetConstant(ScaleShift, DL, VT), Shift1);
16600-
}
16608+
if (Shift >= 1 && Shift <= 3 && isPowerOf2_64(MulAmt & (MulAmt - 1))) {
16609+
unsigned ShiftAmt = llvm::countr_zero((MulAmt & (MulAmt - 1)));
16610+
SDLoc DL(N);
16611+
SDValue Shift1 =
16612+
DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
16613+
return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16614+
DAG.getTargetConstant(Shift, DL, VT), Shift1);
1660116615
}
1660216616

16603-
// 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
16604-
// This is the two instruction form, there are also three instruction
16605-
// variants we could implement. e.g.
16606-
// (2^(1,2,3) * 3,5,9 + 1) << C2
16607-
// 2^(C1>3) * 3,5,9 +/- 1
16608-
if (int ShXAmount = isShifted359(MulAmt - 1, Shift)) {
16609-
assert(Shift != 0 && "MulAmt=4,6,10 handled before");
16610-
if (Shift <= 3) {
16611-
SDLoc DL(N);
16612-
SDValue Mul359 =
16613-
DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16614-
DAG.getTargetConstant(ShXAmount, DL, VT), X);
16615-
return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
16616-
DAG.getTargetConstant(Shift, DL, VT), X);
16617-
}
16618-
}
16617+
// TODO: 2^(C1>3) * 3,5,9 +/- 1
1661916618

1662016619
// 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
1662116620
if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
@@ -16647,14 +16646,6 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
1664716646
return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
1664816647
}
1664916648
}
16650-
16651-
// 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
16652-
// of 25 which happen to be quite common.
16653-
Shift = llvm::countr_zero(MulAmt);
16654-
if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt >> Shift)) {
16655-
SDLoc DL(N);
16656-
return DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(Shift, DL, VT));
16657-
}
1665816649
}
1665916650

1666016651
if (SDValue V = expandMulToAddOrSubOfShl(N, DAG, MulAmt))

llvm/test/CodeGen/RISCV/rv64xtheadba.ll

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -205,12 +205,19 @@ define i64 @addmul20(i64 %a, i64 %b) {
205205
}
206206

207207
define i64 @addmul22(i64 %a, i64 %b) {
208-
; CHECK-LABEL: addmul22:
209-
; CHECK: # %bb.0:
210-
; CHECK-NEXT: li a2, 22
211-
; CHECK-NEXT: mul a0, a0, a2
212-
; CHECK-NEXT: add a0, a0, a1
213-
; CHECK-NEXT: ret
208+
; RV64I-LABEL: addmul22:
209+
; RV64I: # %bb.0:
210+
; RV64I-NEXT: li a2, 22
211+
; RV64I-NEXT: mul a0, a0, a2
212+
; RV64I-NEXT: add a0, a0, a1
213+
; RV64I-NEXT: ret
214+
;
215+
; RV64XTHEADBA-LABEL: addmul22:
216+
; RV64XTHEADBA: # %bb.0:
217+
; RV64XTHEADBA-NEXT: th.addsl a2, a0, a0, 2
218+
; RV64XTHEADBA-NEXT: th.addsl a0, a0, a2, 1
219+
; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 1
220+
; RV64XTHEADBA-NEXT: ret
214221
%c = mul i64 %a, 22
215222
%d = add i64 %c, %b
216223
ret i64 %d

llvm/test/CodeGen/RISCV/rv64zba.ll

Lines changed: 203 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -585,6 +585,33 @@ define i64 @addmul12(i64 %a, i64 %b) {
585585
ret i64 %d
586586
}
587587

588+
define i64 @addmul14(i64 %a, i64 %b) {
589+
; RV64I-LABEL: addmul14:
590+
; RV64I: # %bb.0:
591+
; RV64I-NEXT: slli a2, a0, 1
592+
; RV64I-NEXT: slli a0, a0, 4
593+
; RV64I-NEXT: sub a0, a0, a2
594+
; RV64I-NEXT: add a0, a0, a1
595+
; RV64I-NEXT: ret
596+
;
597+
; RV64ZBA-LABEL: addmul14:
598+
; RV64ZBA: # %bb.0:
599+
; RV64ZBA-NEXT: sh1add a2, a0, a0
600+
; RV64ZBA-NEXT: sh1add a0, a2, a0
601+
; RV64ZBA-NEXT: sh1add a0, a0, a1
602+
; RV64ZBA-NEXT: ret
603+
;
604+
; RV64XANDESPERF-LABEL: addmul14:
605+
; RV64XANDESPERF: # %bb.0:
606+
; RV64XANDESPERF-NEXT: nds.lea.h a2, a0, a0
607+
; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a2
608+
; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
609+
; RV64XANDESPERF-NEXT: ret
610+
%c = mul i64 %a, 14
611+
%d = add i64 %c, %b
612+
ret i64 %d
613+
}
614+
588615
define i64 @addmul18(i64 %a, i64 %b) {
589616
; RV64I-LABEL: addmul18:
590617
; RV64I: # %bb.0:
@@ -636,12 +663,26 @@ define i64 @addmul20(i64 %a, i64 %b) {
636663
}
637664

638665
define i64 @addmul22(i64 %a, i64 %b) {
639-
; CHECK-LABEL: addmul22:
640-
; CHECK: # %bb.0:
641-
; CHECK-NEXT: li a2, 22
642-
; CHECK-NEXT: mul a0, a0, a2
643-
; CHECK-NEXT: add a0, a0, a1
644-
; CHECK-NEXT: ret
666+
; RV64I-LABEL: addmul22:
667+
; RV64I: # %bb.0:
668+
; RV64I-NEXT: li a2, 22
669+
; RV64I-NEXT: mul a0, a0, a2
670+
; RV64I-NEXT: add a0, a0, a1
671+
; RV64I-NEXT: ret
672+
;
673+
; RV64ZBA-LABEL: addmul22:
674+
; RV64ZBA: # %bb.0:
675+
; RV64ZBA-NEXT: sh2add a2, a0, a0
676+
; RV64ZBA-NEXT: sh1add a0, a2, a0
677+
; RV64ZBA-NEXT: sh1add a0, a0, a1
678+
; RV64ZBA-NEXT: ret
679+
;
680+
; RV64XANDESPERF-LABEL: addmul22:
681+
; RV64XANDESPERF: # %bb.0:
682+
; RV64XANDESPERF-NEXT: nds.lea.w a2, a0, a0
683+
; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a2
684+
; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
685+
; RV64XANDESPERF-NEXT: ret
645686
%c = mul i64 %a, 22
646687
%d = add i64 %c, %b
647688
ret i64 %d
@@ -672,6 +713,32 @@ define i64 @addmul24(i64 %a, i64 %b) {
672713
ret i64 %d
673714
}
674715

716+
define i64 @addmul26(i64 %a, i64 %b) {
717+
; RV64I-LABEL: addmul26:
718+
; RV64I: # %bb.0:
719+
; RV64I-NEXT: li a2, 26
720+
; RV64I-NEXT: mul a0, a0, a2
721+
; RV64I-NEXT: add a0, a0, a1
722+
; RV64I-NEXT: ret
723+
;
724+
; RV64ZBA-LABEL: addmul26:
725+
; RV64ZBA: # %bb.0:
726+
; RV64ZBA-NEXT: sh1add a2, a0, a0
727+
; RV64ZBA-NEXT: sh2add a0, a2, a0
728+
; RV64ZBA-NEXT: sh1add a0, a0, a1
729+
; RV64ZBA-NEXT: ret
730+
;
731+
; RV64XANDESPERF-LABEL: addmul26:
732+
; RV64XANDESPERF: # %bb.0:
733+
; RV64XANDESPERF-NEXT: nds.lea.h a2, a0, a0
734+
; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a2
735+
; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
736+
; RV64XANDESPERF-NEXT: ret
737+
%c = mul i64 %a, 26
738+
%d = add i64 %c, %b
739+
ret i64 %d
740+
}
741+
675742
define i64 @addmul36(i64 %a, i64 %b) {
676743
; RV64I-LABEL: addmul36:
677744
; RV64I: # %bb.0:
@@ -722,6 +789,58 @@ define i64 @addmul40(i64 %a, i64 %b) {
722789
ret i64 %d
723790
}
724791

792+
define i64 @addmul38(i64 %a, i64 %b) {
793+
; RV64I-LABEL: addmul38:
794+
; RV64I: # %bb.0:
795+
; RV64I-NEXT: li a2, 38
796+
; RV64I-NEXT: mul a0, a0, a2
797+
; RV64I-NEXT: add a0, a0, a1
798+
; RV64I-NEXT: ret
799+
;
800+
; RV64ZBA-LABEL: addmul38:
801+
; RV64ZBA: # %bb.0:
802+
; RV64ZBA-NEXT: sh3add a2, a0, a0
803+
; RV64ZBA-NEXT: sh1add a0, a2, a0
804+
; RV64ZBA-NEXT: sh1add a0, a0, a1
805+
; RV64ZBA-NEXT: ret
806+
;
807+
; RV64XANDESPERF-LABEL: addmul38:
808+
; RV64XANDESPERF: # %bb.0:
809+
; RV64XANDESPERF-NEXT: nds.lea.d a2, a0, a0
810+
; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a2
811+
; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
812+
; RV64XANDESPERF-NEXT: ret
813+
%c = mul i64 %a, 38
814+
%d = add i64 %c, %b
815+
ret i64 %d
816+
}
817+
818+
define i64 @addmul42(i64 %a, i64 %b) {
819+
; RV64I-LABEL: addmul42:
820+
; RV64I: # %bb.0:
821+
; RV64I-NEXT: li a2, 42
822+
; RV64I-NEXT: mul a0, a0, a2
823+
; RV64I-NEXT: add a0, a0, a1
824+
; RV64I-NEXT: ret
825+
;
826+
; RV64ZBA-LABEL: addmul42:
827+
; RV64ZBA: # %bb.0:
828+
; RV64ZBA-NEXT: sh2add a2, a0, a0
829+
; RV64ZBA-NEXT: sh2add a0, a2, a0
830+
; RV64ZBA-NEXT: sh1add a0, a0, a1
831+
; RV64ZBA-NEXT: ret
832+
;
833+
; RV64XANDESPERF-LABEL: addmul42:
834+
; RV64XANDESPERF: # %bb.0:
835+
; RV64XANDESPERF-NEXT: nds.lea.w a2, a0, a0
836+
; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a2
837+
; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
838+
; RV64XANDESPERF-NEXT: ret
839+
%c = mul i64 %a, 42
840+
%d = add i64 %c, %b
841+
ret i64 %d
842+
}
843+
725844
define i64 @addmul72(i64 %a, i64 %b) {
726845
; RV64I-LABEL: addmul72:
727846
; RV64I: # %bb.0:
@@ -747,6 +866,84 @@ define i64 @addmul72(i64 %a, i64 %b) {
747866
ret i64 %d
748867
}
749868

869+
define i64 @addmul74(i64 %a, i64 %b) {
870+
; RV64I-LABEL: addmul74:
871+
; RV64I: # %bb.0:
872+
; RV64I-NEXT: li a2, 74
873+
; RV64I-NEXT: mul a0, a0, a2
874+
; RV64I-NEXT: add a0, a0, a1
875+
; RV64I-NEXT: ret
876+
;
877+
; RV64ZBA-LABEL: addmul74:
878+
; RV64ZBA: # %bb.0:
879+
; RV64ZBA-NEXT: sh3add a2, a0, a0
880+
; RV64ZBA-NEXT: sh2add a0, a2, a0
881+
; RV64ZBA-NEXT: sh1add a0, a0, a1
882+
; RV64ZBA-NEXT: ret
883+
;
884+
; RV64XANDESPERF-LABEL: addmul74:
885+
; RV64XANDESPERF: # %bb.0:
886+
; RV64XANDESPERF-NEXT: nds.lea.d a2, a0, a0
887+
; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a2
888+
; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
889+
; RV64XANDESPERF-NEXT: ret
890+
%c = mul i64 %a, 74
891+
%d = add i64 %c, %b
892+
ret i64 %d
893+
}
894+
895+
define i64 @addmul82(i64 %a, i64 %b) {
896+
; RV64I-LABEL: addmul82:
897+
; RV64I: # %bb.0:
898+
; RV64I-NEXT: li a2, 82
899+
; RV64I-NEXT: mul a0, a0, a2
900+
; RV64I-NEXT: add a0, a0, a1
901+
; RV64I-NEXT: ret
902+
;
903+
; RV64ZBA-LABEL: addmul82:
904+
; RV64ZBA: # %bb.0:
905+
; RV64ZBA-NEXT: sh2add a2, a0, a0
906+
; RV64ZBA-NEXT: sh3add a0, a2, a0
907+
; RV64ZBA-NEXT: sh1add a0, a0, a1
908+
; RV64ZBA-NEXT: ret
909+
;
910+
; RV64XANDESPERF-LABEL: addmul82:
911+
; RV64XANDESPERF: # %bb.0:
912+
; RV64XANDESPERF-NEXT: nds.lea.w a2, a0, a0
913+
; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a2
914+
; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
915+
; RV64XANDESPERF-NEXT: ret
916+
%c = mul i64 %a, 82
917+
%d = add i64 %c, %b
918+
ret i64 %d
919+
}
920+
921+
define i64 @addmul146(i64 %a, i64 %b) {
922+
; RV64I-LABEL: addmul146:
923+
; RV64I: # %bb.0:
924+
; RV64I-NEXT: li a2, 146
925+
; RV64I-NEXT: mul a0, a0, a2
926+
; RV64I-NEXT: add a0, a0, a1
927+
; RV64I-NEXT: ret
928+
;
929+
; RV64ZBA-LABEL: addmul146:
930+
; RV64ZBA: # %bb.0:
931+
; RV64ZBA-NEXT: sh3add a2, a0, a0
932+
; RV64ZBA-NEXT: sh3add a0, a2, a0
933+
; RV64ZBA-NEXT: sh1add a0, a0, a1
934+
; RV64ZBA-NEXT: ret
935+
;
936+
; RV64XANDESPERF-LABEL: addmul146:
937+
; RV64XANDESPERF: # %bb.0:
938+
; RV64XANDESPERF-NEXT: nds.lea.d a2, a0, a0
939+
; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a2
940+
; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
941+
; RV64XANDESPERF-NEXT: ret
942+
%c = mul i64 %a, 146
943+
%d = add i64 %c, %b
944+
ret i64 %d
945+
}
946+
750947
define i64 @mul50(i64 %a) {
751948
; RV64I-LABEL: mul50:
752949
; RV64I: # %bb.0:

0 commit comments

Comments
 (0)