@@ -18,19 +18,6 @@ define <4 x float> @ext0_v4f32(<4 x float> %x, <4 x float> %y) {
1818 ret <4 x float > %r
1919}
2020
21- define <4 x float > @ext0_v2f32v4f32 (<2 x float > %x , <4 x float > %y ) {
22- ; CHECK-LABEL: @ext0_v2f32v4f32(
23- ; CHECK-NEXT: [[E:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
24- ; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
25- ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 0
26- ; CHECK-NEXT: ret <4 x float> [[R]]
27- ;
28- %e = extractelement <2 x float > %x , i32 0
29- %n = fneg float %e
30- %r = insertelement <4 x float > %y , float %n , i32 0
31- ret <4 x float > %r
32- }
33-
3421; Eliminating extract/insert is profitable.
3522
3623define <4 x float > @ext2_v4f32 (<4 x float > %x , <4 x float > %y ) {
@@ -45,19 +32,6 @@ define <4 x float> @ext2_v4f32(<4 x float> %x, <4 x float> %y) {
4532 ret <4 x float > %r
4633}
4734
48- define <4 x float > @ext2_v2f32v4f32 (<2 x float > %x , <4 x float > %y ) {
49- ; CHECK-LABEL: @ext2_v2f32v4f32(
50- ; CHECK-NEXT: [[TMP1:%.*]] = fneg <2 x float> [[X:%.*]]
51- ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 2, i32 poison>
52- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
53- ; CHECK-NEXT: ret <4 x float> [[R]]
54- ;
55- %e = extractelement <2 x float > %x , i32 2
56- %n = fneg float %e
57- %r = insertelement <4 x float > %y , float %n , i32 2
58- ret <4 x float > %r
59- }
60-
6135; Eliminating extract/insert is still profitable. Flags propagate.
6236
6337define <2 x double > @ext1_v2f64 (<2 x double > %x , <2 x double > %y ) {
@@ -72,25 +46,6 @@ define <2 x double> @ext1_v2f64(<2 x double> %x, <2 x double> %y) {
7246 ret <2 x double > %r
7347}
7448
75- define <4 x double > @ext1_v2f64v4f64 (<2 x double > %x , <4 x double > %y ) {
76- ; SSE-LABEL: @ext1_v2f64v4f64(
77- ; SSE-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
78- ; SSE-NEXT: [[N:%.*]] = fneg nsz double [[E]]
79- ; SSE-NEXT: [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 1
80- ; SSE-NEXT: ret <4 x double> [[R]]
81- ;
82- ; AVX-LABEL: @ext1_v2f64v4f64(
83- ; AVX-NEXT: [[TMP1:%.*]] = fneg nsz <2 x double> [[X:%.*]]
84- ; AVX-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
85- ; AVX-NEXT: [[R:%.*]] = shufflevector <4 x double> [[Y:%.*]], <4 x double> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
86- ; AVX-NEXT: ret <4 x double> [[R]]
87- ;
88- %e = extractelement <2 x double > %x , i32 1
89- %n = fneg nsz double %e
90- %r = insertelement <4 x double > %y , double %n , i32 1
91- ret <4 x double > %r
92- }
93-
9449; The vector fneg would cost twice as much as the scalar op with SSE,
9550; so we don't transform there (the shuffle would also be more expensive).
9651
@@ -112,19 +67,6 @@ define <8 x float> @ext7_v8f32(<8 x float> %x, <8 x float> %y) {
11267 ret <8 x float > %r
11368}
11469
115- define <8 x float > @ext7_v4f32v8f32 (<4 x float > %x , <8 x float > %y ) {
116- ; CHECK-LABEL: @ext7_v4f32v8f32(
117- ; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3
118- ; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
119- ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 7
120- ; CHECK-NEXT: ret <8 x float> [[R]]
121- ;
122- %e = extractelement <4 x float > %x , i32 3
123- %n = fneg float %e
124- %r = insertelement <8 x float > %y , float %n , i32 7
125- ret <8 x float > %r
126- }
127-
12870; Same as above with an extra use of the extracted element.
12971
13072define <8 x float > @ext7_v8f32_use1 (<8 x float > %x , <8 x float > %y ) {
@@ -149,21 +91,6 @@ define <8 x float> @ext7_v8f32_use1(<8 x float> %x, <8 x float> %y) {
14991 ret <8 x float > %r
15092}
15193
152- define <8 x float > @ext7_v4f32v8f32_use1 (<4 x float > %x , <8 x float > %y ) {
153- ; CHECK-LABEL: @ext7_v4f32v8f32_use1(
154- ; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3
155- ; CHECK-NEXT: call void @use(float [[E]])
156- ; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
157- ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 3
158- ; CHECK-NEXT: ret <8 x float> [[R]]
159- ;
160- %e = extractelement <4 x float > %x , i32 3
161- call void @use (float %e )
162- %n = fneg float %e
163- %r = insertelement <8 x float > %y , float %n , i32 3
164- ret <8 x float > %r
165- }
166-
16794; Negative test - the transform is likely not profitable if the fneg has another use.
16895
16996define <8 x float > @ext7_v8f32_use2 (<8 x float > %x , <8 x float > %y ) {
@@ -181,21 +108,6 @@ define <8 x float> @ext7_v8f32_use2(<8 x float> %x, <8 x float> %y) {
181108 ret <8 x float > %r
182109}
183110
184- define <8 x float > @ext7_v4f32v8f32_use2 (<4 x float > %x , <8 x float > %y ) {
185- ; CHECK-LABEL: @ext7_v4f32v8f32_use2(
186- ; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3
187- ; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
188- ; CHECK-NEXT: call void @use(float [[N]])
189- ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 3
190- ; CHECK-NEXT: ret <8 x float> [[R]]
191- ;
192- %e = extractelement <4 x float > %x , i32 3
193- %n = fneg float %e
194- call void @use (float %n )
195- %r = insertelement <8 x float > %y , float %n , i32 3
196- ret <8 x float > %r
197- }
198-
199111; Negative test - can't convert variable index to a shuffle.
200112
201113define <2 x double > @ext_index_var_v2f64 (<2 x double > %x , <2 x double > %y , i32 %index ) {
@@ -211,19 +123,6 @@ define <2 x double> @ext_index_var_v2f64(<2 x double> %x, <2 x double> %y, i32 %
211123 ret <2 x double > %r
212124}
213125
214- define <4 x double > @ext_index_var_v2f64v4f64 (<2 x double > %x , <4 x double > %y , i32 %index ) {
215- ; CHECK-LABEL: @ext_index_var_v2f64v4f64(
216- ; CHECK-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 [[INDEX:%.*]]
217- ; CHECK-NEXT: [[N:%.*]] = fneg nsz double [[E]]
218- ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 [[INDEX]]
219- ; CHECK-NEXT: ret <4 x double> [[R]]
220- ;
221- %e = extractelement <2 x double > %x , i32 %index
222- %n = fneg nsz double %e
223- %r = insertelement <4 x double > %y , double %n , i32 %index
224- ret <4 x double > %r
225- }
226-
227126; Negative test - require same extract/insert index for simple shuffle.
228127; TODO: We could handle this by adjusting the cost calculation.
229128
@@ -240,33 +139,6 @@ define <2 x double> @ext1_v2f64_ins0(<2 x double> %x, <2 x double> %y) {
240139 ret <2 x double > %r
241140}
242141
243- ; Negative test - extract from an index greater than the vector width of the destination
244- define <2 x double > @ext3_v4f64v2f64 (<4 x double > %x , <2 x double > %y ) {
245- ; CHECK-LABEL: @ext3_v4f64v2f64(
246- ; CHECK-NEXT: [[E:%.*]] = extractelement <4 x double> [[X:%.*]], i32 3
247- ; CHECK-NEXT: [[N:%.*]] = fneg nsz double [[E]]
248- ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x double> [[Y:%.*]], double [[N]], i32 1
249- ; CHECK-NEXT: ret <2 x double> [[R]]
250- ;
251- %e = extractelement <4 x double > %x , i32 3
252- %n = fneg nsz double %e
253- %r = insertelement <2 x double > %y , double %n , i32 1
254- ret <2 x double > %r
255- }
256-
257- define <4 x double > @ext1_v2f64v4f64_ins0 (<2 x double > %x , <4 x double > %y ) {
258- ; CHECK-LABEL: @ext1_v2f64v4f64_ins0(
259- ; CHECK-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1
260- ; CHECK-NEXT: [[N:%.*]] = fneg nsz double [[E]]
261- ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 0
262- ; CHECK-NEXT: ret <4 x double> [[R]]
263- ;
264- %e = extractelement <2 x double > %x , i32 1
265- %n = fneg nsz double %e
266- %r = insertelement <4 x double > %y , double %n , i32 0
267- ret <4 x double > %r
268- }
269-
270142; Negative test - avoid changing poison ops
271143
272144define <4 x float > @ext12_v4f32 (<4 x float > %x , <4 x float > %y ) {
@@ -282,19 +154,6 @@ define <4 x float> @ext12_v4f32(<4 x float> %x, <4 x float> %y) {
282154 ret <4 x float > %r
283155}
284156
285- define <4 x float > @ext12_v2f32v4f32 (<2 x float > %x , <4 x float > %y ) {
286- ; CHECK-LABEL: @ext12_v2f32v4f32(
287- ; CHECK-NEXT: [[E:%.*]] = extractelement <2 x float> [[X:%.*]], i32 6
288- ; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
289- ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 12
290- ; CHECK-NEXT: ret <4 x float> [[R]]
291- ;
292- %e = extractelement <2 x float > %x , i32 6
293- %n = fneg float %e
294- %r = insertelement <4 x float > %y , float %n , i32 12
295- ret <4 x float > %r
296- }
297-
298157; This used to crash because we assumed matching a true, unary fneg instruction.
299158
300159define <2 x float > @ext1_v2f32_fsub (<2 x float > %x ) {
@@ -322,16 +181,3 @@ define <2 x float> @ext1_v2f32_fsub_fmf(<2 x float> %x, <2 x float> %y) {
322181 %r = insertelement <2 x float > %y , float %s , i32 1
323182 ret <2 x float > %r
324183}
325-
326- define <4 x float > @ext1_v2f32v4f32_fsub_fmf (<2 x float > %x , <4 x float > %y ) {
327- ; CHECK-LABEL: @ext1_v2f32v4f32_fsub_fmf(
328- ; CHECK-NEXT: [[TMP1:%.*]] = fneg nnan nsz <2 x float> [[X:%.*]]
329- ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[X]], <2 x float> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
330- ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
331- ; CHECK-NEXT: ret <4 x float> [[R]]
332- ;
333- %e = extractelement <2 x float > %x , i32 1
334- %s = fsub nsz nnan float 0 .0 , %e
335- %r = insertelement <4 x float > %y , float %s , i32 1
336- ret <4 x float > %r
337- }
0 commit comments