[X86][SSE] Add shufps+shufps test for fold through commutation

As mentioned on D73023, lowerShuffleWithSHUFPS should be able to commute the shufps inputs to fold the second arg as it will then permute the shufps result anyway.
This commit is contained in:
Simon Pilgrim 2020-01-24 11:16:16 +00:00
parent d4b092b341
commit e37cdbeeab

View File

@ -2467,3 +2467,31 @@ define <4 x float> @shuffle_mem_v4f32_4523(<4 x float> %a, <4 x float>* %pb) {
%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 3> %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
ret <4 x float> %shuffle ret <4 x float> %shuffle
} }
define <4 x float> @shuffle_mem_v4f32_0624(<4 x float> %a0, <4 x float>* %a1) {
; SSE-LABEL: shuffle_mem_v4f32_0624:
; SSE: # %bb.0:
; SSE-NEXT: movaps (%rdi), %xmm1
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,0]
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX1OR2-LABEL: shuffle_mem_v4f32_0624:
; AVX1OR2: # %bb.0:
; AVX1OR2-NEXT: vmovaps (%rdi), %xmm1
; AVX1OR2-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[2,0]
; AVX1OR2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
; AVX1OR2-NEXT: retq
;
; AVX512VL-LABEL: shuffle_mem_v4f32_0624:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovaps (%rdi), %xmm2
; AVX512VL-NEXT: vmovaps {{.*#+}} xmm1 = [0,6,2,4]
; AVX512VL-NEXT: vpermi2ps %xmm0, %xmm2, %xmm1
; AVX512VL-NEXT: vmovaps %xmm1, %xmm0
; AVX512VL-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a1
%2 = shufflevector <4 x float> %1, <4 x float> %a0, <4 x i32> <i32 0, i32 6, i32 2, i32 4>
ret <4 x float> %2
}