diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll index 4b012e73f9c..0462caf23fe 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll @@ -2467,3 +2467,31 @@ define <4 x float> @shuffle_mem_v4f32_4523(<4 x float> %a, <4 x float>* %pb) { %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %shuffle } + +define <4 x float> @shuffle_mem_v4f32_0624(<4 x float> %a0, <4 x float>* %a1) { +; SSE-LABEL: shuffle_mem_v4f32_0624: +; SSE: # %bb.0: +; SSE-NEXT: movaps (%rdi), %xmm1 +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,0] +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,1,3] +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: shuffle_mem_v4f32_0624: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vmovaps (%rdi), %xmm1 +; AVX1OR2-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[2,0] +; AVX1OR2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3] +; AVX1OR2-NEXT: retq +; +; AVX512VL-LABEL: shuffle_mem_v4f32_0624: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vmovaps (%rdi), %xmm2 +; AVX512VL-NEXT: vmovaps {{.*#+}} xmm1 = [0,6,2,4] +; AVX512VL-NEXT: vpermi2ps %xmm0, %xmm2, %xmm1 +; AVX512VL-NEXT: vmovaps %xmm1, %xmm0 +; AVX512VL-NEXT: retq + %1 = load <4 x float>, <4 x float>* %a1 + %2 = shufflevector <4 x float> %1, <4 x float> %a0, <4 x i32> + ret <4 x float> %2 +}