teak-llvm/llvm/test/CodeGen/X86/shuffle-extract-subvector.ll
Justin Bogner b353d6887e [DAGCombine] Fix a miscompile when reducing BUILD_VECTORs to a shuffle
In r311255 we added a case where we split vectors whose elements are
all derived from the same input vector so that we could shuffle it
more efficiently. In doing so, createBuildVecShuffle was taught to
adjust for the fact that all indices would be based off of the first
vector when this happens, but it's possible for the code that checked
that to fire incorrectly if we happen to have a BUILD_VECTOR of
extracts from subvectors and don't hit this new optimization.

Instead of trying to detect if we've split the vector by checking if
we have extracts from the same base vector, we can just pass that
information into createBuildVecShuffle, avoiding the miscompile.

Differential Revision: https://reviews.llvm.org/D59507

llvm-svn: 356476
2019-03-19 16:52:00 +00:00

49 lines
2.2 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
define void @f(<4 x half>* %a, <4 x half>* %b, <8 x half>* %c) {
; CHECK-LABEL: f:
; CHECK: # %bb.0:
; CHECK-NEXT: movzwl (%rdi), %r8d
; CHECK-NEXT: movzwl 2(%rdi), %r9d
; CHECK-NEXT: movzwl 4(%rdi), %r11d
; CHECK-NEXT: movzwl 6(%rdi), %edi
; CHECK-NEXT: movzwl (%rsi), %r10d
; CHECK-NEXT: movzwl 2(%rsi), %ecx
; CHECK-NEXT: movzwl 4(%rsi), %eax
; CHECK-NEXT: movzwl 6(%rsi), %esi
; CHECK-NEXT: movw %si, 14(%rdx)
; CHECK-NEXT: movw %di, 12(%rdx)
; CHECK-NEXT: movw %ax, 10(%rdx)
; CHECK-NEXT: movw %r11w, 8(%rdx)
; CHECK-NEXT: movw %cx, 6(%rdx)
; CHECK-NEXT: movw %r9w, 4(%rdx)
; CHECK-NEXT: movw %r10w, 2(%rdx)
; CHECK-NEXT: movw %r8w, (%rdx)
; CHECK-NEXT: retq
%tmp4 = load <4 x half>, <4 x half>* %a
%tmp5 = load <4 x half>, <4 x half>* %b
%tmp7 = shufflevector <4 x half> %tmp4, <4 x half> %tmp5, <2 x i32> <i32 0, i32 4>
%tmp8 = shufflevector <4 x half> %tmp4, <4 x half> %tmp5, <2 x i32> <i32 1, i32 5>
%tmp9 = shufflevector <4 x half> %tmp4, <4 x half> %tmp5, <2 x i32> <i32 2, i32 6>
%tmp10 = shufflevector <4 x half> %tmp4, <4 x half> %tmp5, <2 x i32> <i32 3, i32 7>
%tmp11 = extractelement <2 x half> %tmp7, i32 0
%tmp12 = insertelement <8 x half> undef, half %tmp11, i32 0
%tmp13 = extractelement <2 x half> %tmp7, i32 1
%tmp14 = insertelement <8 x half> %tmp12, half %tmp13, i32 1
%tmp15 = extractelement <2 x half> %tmp8, i32 0
%tmp16 = insertelement <8 x half> %tmp14, half %tmp15, i32 2
%tmp17 = extractelement <2 x half> %tmp8, i32 1
%tmp18 = insertelement <8 x half> %tmp16, half %tmp17, i32 3
%tmp19 = extractelement <2 x half> %tmp9, i32 0
%tmp20 = insertelement <8 x half> %tmp18, half %tmp19, i32 4
%tmp21 = extractelement <2 x half> %tmp9, i32 1
%tmp22 = insertelement <8 x half> %tmp20, half %tmp21, i32 5
%tmp23 = extractelement <2 x half> %tmp10, i32 0
%tmp24 = insertelement <8 x half> %tmp22, half %tmp23, i32 6
%tmp25 = extractelement <2 x half> %tmp10, i32 1
%tmp26 = insertelement <8 x half> %tmp24, half %tmp25, i32 7
store <8 x half> %tmp26, <8 x half>* %c
ret void
}