@@ -1243,3 +1243,81 @@ bb:
12431243 store volatile i8 4 , ptr addrspace (5 ) %p4
12441244 ret void
12451245}
1246+
1247+ define amdgpu_kernel void @soff1_voff1_negative (i32 %soff ) {
1248+ ; GFX940-SDAG-LABEL: soff1_voff1_negative:
1249+ ; GFX940-SDAG: ; %bb.0: ; %bb
1250+ ; GFX940-SDAG-NEXT: s_load_dword s0, s[4:5], 0x24
1251+ ; GFX940-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1252+ ; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1
1253+ ; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1254+ ; GFX940-SDAG-NEXT: v_add_u32_e32 v0, s0, v0
1255+ ; GFX940-SDAG-NEXT: v_add_u32_e32 v0, -1, v0
1256+ ; GFX940-SDAG-NEXT: scratch_store_byte v0, v1, off sc0 sc1
1257+ ; GFX940-SDAG-NEXT: s_waitcnt vmcnt(0)
1258+ ; GFX940-SDAG-NEXT: s_endpgm
1259+ ;
1260+ ; GFX940-GISEL-LABEL: soff1_voff1_negative:
1261+ ; GFX940-GISEL: ; %bb.0: ; %bb
1262+ ; GFX940-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24
1263+ ; GFX940-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1264+ ; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 1
1265+ ; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1266+ ; GFX940-GISEL-NEXT: s_add_u32 s0, 0, s0
1267+ ; GFX940-GISEL-NEXT: v_add3_u32 v0, s0, v0, -1
1268+ ; GFX940-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1
1269+ ; GFX940-GISEL-NEXT: s_waitcnt vmcnt(0)
1270+ ; GFX940-GISEL-NEXT: s_endpgm
1271+ ;
1272+ ; GFX11-SDAG-LABEL: soff1_voff1_negative:
1273+ ; GFX11-SDAG: ; %bb.0: ; %bb
1274+ ; GFX11-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x24
1275+ ; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
1276+ ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1277+ ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
1278+ ; GFX11-SDAG-NEXT: v_add3_u32 v0, 0, s0, v0
1279+ ; GFX11-SDAG-NEXT: scratch_store_b8 v0, v1, off offset:-1 dlc
1280+ ; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
1281+ ; GFX11-SDAG-NEXT: s_endpgm
1282+ ;
1283+ ; GFX11-GISEL-LABEL: soff1_voff1_negative:
1284+ ; GFX11-GISEL: ; %bb.0: ; %bb
1285+ ; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24
1286+ ; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
1287+ ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1288+ ; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0
1289+ ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
1290+ ; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0
1291+ ; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:-1 dlc
1292+ ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
1293+ ; GFX11-GISEL-NEXT: s_endpgm
1294+ ;
1295+ ; GFX12-SDAG-LABEL: soff1_voff1_negative:
1296+ ; GFX12-SDAG: ; %bb.0: ; %bb
1297+ ; GFX12-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x24
1298+ ; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
1299+ ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
1300+ ; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:-1 scope:SCOPE_SYS
1301+ ; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
1302+ ; GFX12-SDAG-NEXT: s_endpgm
1303+ ;
1304+ ; GFX12-GISEL-LABEL: soff1_voff1_negative:
1305+ ; GFX12-GISEL: ; %bb.0: ; %bb
1306+ ; GFX12-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24
1307+ ; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0
1308+ ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
1309+ ; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0
1310+ ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
1311+ ; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0
1312+ ; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:-1 scope:SCOPE_SYS
1313+ ; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
1314+ ; GFX12-GISEL-NEXT: s_endpgm
1315+ bb:
1316+ %a = alloca [64 x i8 ], align 4 , addrspace (5 )
1317+ %as = getelementptr i8 , ptr addrspace (5 ) %a , i32 %soff
1318+ %voff = call i32 @llvm.amdgcn.workitem.id.x ()
1319+ %asv = getelementptr i8 , ptr addrspace (5 ) %as , i32 %voff
1320+ %p1 = getelementptr i8 , ptr addrspace (5 ) %asv , i32 -1
1321+ store volatile i8 1 , ptr addrspace (5 ) %p1
1322+ ret void
1323+ }
0 commit comments