teak-llvm/llvm/test/CodeGen/X86/vector-partial-undef.ll
Sanjay Patel 0279b5b0b8 [TargetLowering] try harder to determine undef elements of vector binops
This might be the start of tracking all vector element constants generally if we take it to its 
logical conclusion, but let's stop here and make sure this is correct/beneficial so far.

The affected tests require a convoluted path before they get simplified currently because we 
don't call SimplifyDemandedVectorElts() from binops directly and don't modify the binop operands 
directly in SimplifyDemandedVectorElts().

That's why the tests all have a trailing shuffle to induce a chain reaction of transforms. So 
something like this is happening:

1. Improve the knowledge of undefs in the binop via a SimplifyDemandedVectorElts() call that 
   originates from a shuffle.
2. Transfer that undef knowledge back to the shuffle mask user as more undef lanes.
3. Combine the modified shuffle by calling SimplifyDemandedVectorElts() again.
4. Translate the improved shuffle mask as undemanded lanes of build vector constants causing 
   those to become full undef constants.
5. Simplify the binop now that it has a full undef operand.

As we can see from the unchanged 'and' and 'or' tests, tracking undefs alone isn't a full solution. 
We would need to track zero and all-ones constants to improve those opcodes. We'd probably need to 
track NaN for FP ops too (assuming we don't have fast-math-flags set).

Differential Revision: https://reviews.llvm.org/D57066

llvm-svn: 352880
2019-02-01 15:35:12 +00:00

166 lines
6.7 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefix=SSE
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefix=AVX
; xor undef, undef --> 0 because it's not worth fighting to make that return undef?
define <4 x i64> @xor_insert_insert(<2 x i64> %x, <2 x i64> %y) {
; SSE-LABEL: xor_insert_insert:
; SSE: # %bb.0:
; SSE-NEXT: xorps %xmm1, %xmm0
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: retq
;
; AVX-LABEL: xor_insert_insert:
; AVX: # %bb.0:
; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%xw = shufflevector <2 x i64> %x, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%yw = shufflevector <2 x i64> %y, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%r = xor <4 x i64> %xw, %yw
ret <4 x i64> %r
}
define <4 x i64> @xor_insert_insert_high_half(<2 x i64> %x, <2 x i64> %y) {
; SSE-LABEL: xor_insert_insert_high_half:
; SSE: # %bb.0:
; SSE-NEXT: xorps %xmm0, %xmm1
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: xor_insert_insert_high_half:
; AVX: # %bb.0:
; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX-NEXT: retq
%xw = shufflevector <2 x i64> %x, <2 x i64> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
%yw = shufflevector <2 x i64> %y, <2 x i64> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
%r = xor <4 x i64> %xw, %yw
ret <4 x i64> %r
}
; All elements of the add are undefined:
; x[0] , x[1] , x[2] , x[3], u , u , u , u
; + u , u , u , u , 42 , 43 , 44 , 45
define <8 x i32> @add_undef_elts(<4 x i32> %x) {
; SSE-LABEL: add_undef_elts:
; SSE: # %bb.0:
; SSE-NEXT: retq
;
; AVX-LABEL: add_undef_elts:
; AVX: # %bb.0:
; AVX-NEXT: retq
%extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
%bogus_bo = add <8 x i32> %extend, <i32 undef, i32 undef, i32 undef, i32 undef, i32 42, i32 43, i32 44, i32 12>
%arbitrary_shuf = shufflevector <8 x i32> %bogus_bo, <8 x i32> undef, <8 x i32> <i32 6, i32 0, i32 5, i32 4, i32 3, i32 2, i32 1, i32 7>
ret <8 x i32> %arbitrary_shuf
}
; Verify that constant operand 0 for a sub works too.
define <8 x i32> @sub_undef_elts(<4 x i32> %x) {
; SSE-LABEL: sub_undef_elts:
; SSE: # %bb.0:
; SSE-NEXT: retq
;
; AVX-LABEL: sub_undef_elts:
; AVX: # %bb.0:
; AVX-NEXT: retq
%extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
%bogus_bo = sub <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 42, i32 43, i32 44, i32 12>, %extend
%arbitrary_shuf = shufflevector <8 x i32> %bogus_bo, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 5, i32 4, i32 3, i32 2, i32 6, i32 7>
ret <8 x i32> %arbitrary_shuf
}
; and undef, C --> 0, so this tests that we are tracking known zero lanes.
define <4 x i64> @and_undef_elts(<2 x i64> %x) {
; SSE-LABEL: and_undef_elts:
; SSE: # %bb.0:
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: retq
;
; AVX-LABEL: and_undef_elts:
; AVX: # %bb.0:
; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
; AVX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,0,1,2]
; AVX-NEXT: retq
%extend = shufflevector <2 x i64> %x, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%bogus_bo = and <4 x i64> %extend, <i64 undef, i64 undef, i64 42, i64 43>
%arbitrary_shuf = shufflevector <4 x i64> %bogus_bo, <4 x i64> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
ret <4 x i64> %arbitrary_shuf
}
; or undef, C --> -1, so this tests that we are tracking known all-ones lanes.
define <4 x i64> @or_undef_elts(<2 x i64> %x) {
; SSE-LABEL: or_undef_elts:
; SSE: # %bb.0:
; SSE-NEXT: pcmpeqd %xmm0, %xmm0
; SSE-NEXT: pcmpeqd %xmm1, %xmm1
; SSE-NEXT: retq
;
; AVX-LABEL: or_undef_elts:
; AVX: # %bb.0:
; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX-NEXT: vorps {{.*}}(%rip), %ymm0, %ymm0
; AVX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,0,1,2]
; AVX-NEXT: retq
%extend = shufflevector <2 x i64> %x, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%bogus_bo = or <4 x i64> %extend, <i64 undef, i64 undef, i64 42, i64 43>
%arbitrary_shuf = shufflevector <4 x i64> %bogus_bo, <4 x i64> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
ret <4 x i64> %arbitrary_shuf
}
; Verify that this isn't limited to high/low halves.
define <8 x i32> @xor_undef_elts(<4 x i32> %x) {
; SSE-LABEL: xor_undef_elts:
; SSE: # %bb.0:
; SSE-NEXT: retq
;
; AVX-LABEL: xor_undef_elts:
; AVX: # %bb.0:
; AVX-NEXT: retq
%extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 1, i32 3, i32 0, i32 2, i32 undef, i32 undef>
%bogus_bo = xor <8 x i32> %extend, <i32 42, i32 43, i32 undef, i32 undef, i32 undef, i32 undef, i32 44, i32 12>
%arbitrary_shuf = shufflevector <8 x i32> %bogus_bo, <8 x i32> undef, <8 x i32> <i32 6, i32 1, i32 5, i32 4, i32 3, i32 2, i32 0, i32 7>
ret <8 x i32> %arbitrary_shuf
}
; Verify that this isn't limited to high/low halves
; Special case: the undef-ness of the 1st shuffle may be lost if we turn that into vector concat.
define <8 x i32> @xor_undef_elts_alt(<4 x i32> %x) {
; SSE-LABEL: xor_undef_elts_alt:
; SSE: # %bb.0:
; SSE-NEXT: movaps %xmm0, %xmm1
; SSE-NEXT: movaps {{.*#+}} xmm2 = <u,u,44,12>
; SSE-NEXT: xorps %xmm0, %xmm2
; SSE-NEXT: xorps {{.*}}(%rip), %xmm1
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm2[2,0]
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm2[1,0]
; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,0],xmm1[0,0]
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,2],xmm2[2,0]
; SSE-NEXT: retq
;
; AVX-LABEL: xor_undef_elts_alt:
; AVX: # %bb.0:
; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0
; AVX-NEXT: vmovaps {{.*#+}} ymm1 = [6,1,5,4,3,2,0,7]
; AVX-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
%extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 2, i32 3, i32 0, i32 1, i32 undef, i32 undef>
%bogus_bo = xor <8 x i32> %extend, <i32 42, i32 43, i32 undef, i32 undef, i32 undef, i32 undef, i32 44, i32 12>
%arbitrary_shuf = shufflevector <8 x i32> %bogus_bo, <8 x i32> undef, <8 x i32> <i32 6, i32 1, i32 5, i32 4, i32 3, i32 2, i32 0, i32 7>
ret <8 x i32> %arbitrary_shuf
}