mirror of
https://github.com/Gericom/teak-llvm.git
synced 2025-06-19 19:45:40 -04:00

This might be the start of tracking all vector element constants generally if we take it to its logical conclusion, but let's stop here and make sure this is correct/beneficial so far. The affected tests require a convoluted path before they get simplified currently because we don't call SimplifyDemandedVectorElts() from binops directly and don't modify the binop operands directly in SimplifyDemandedVectorElts(). That's why the tests all have a trailing shuffle to induce a chain reaction of transforms. So something like this is happening: 1. Improve the knowledge of undefs in the binop via a SimplifyDemandedVectorElts() call that originates from a shuffle. 2. Transfer that undef knowledge back to the shuffle mask user as more undef lanes. 3. Combine the modified shuffle by calling SimplifyDemandedVectorElts() again. 4. Translate the improved shuffle mask as undemanded lanes of build vector constants causing those to become full undef constants. 5. Simplify the binop now that it has a full undef operand. As we can see from the unchanged 'and' and 'or' tests, tracking undefs alone isn't a full solution. We would need to track zero and all-ones constants to improve those opcodes. We'd probably need to track NaN for FP ops too (assuming we don't have fast-math-flags set). Differential Revision: https://reviews.llvm.org/D57066 llvm-svn: 352880
166 lines
6.7 KiB
LLVM
166 lines
6.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefix=SSE
|
|
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefix=AVX
|
|
|
|
; xor undef, undef --> 0 because it's not worth fighting to make that return undef?
|
|
|
|
define <4 x i64> @xor_insert_insert(<2 x i64> %x, <2 x i64> %y) {
|
|
; SSE-LABEL: xor_insert_insert:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: xorps %xmm1, %xmm0
|
|
; SSE-NEXT: xorps %xmm1, %xmm1
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: xor_insert_insert:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%xw = shufflevector <2 x i64> %x, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
|
%yw = shufflevector <2 x i64> %y, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
|
%r = xor <4 x i64> %xw, %yw
|
|
ret <4 x i64> %r
|
|
}
|
|
|
|
define <4 x i64> @xor_insert_insert_high_half(<2 x i64> %x, <2 x i64> %y) {
|
|
; SSE-LABEL: xor_insert_insert_high_half:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: xorps %xmm0, %xmm1
|
|
; SSE-NEXT: xorps %xmm0, %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: xor_insert_insert_high_half:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0
|
|
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
|
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
|
; AVX-NEXT: retq
|
|
%xw = shufflevector <2 x i64> %x, <2 x i64> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
|
|
%yw = shufflevector <2 x i64> %y, <2 x i64> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
|
|
%r = xor <4 x i64> %xw, %yw
|
|
ret <4 x i64> %r
|
|
}
|
|
|
|
; All elements of the add are undefined:
|
|
; x[0] , x[1] , x[2] , x[3], u , u , u , u
|
|
; + u , u , u , u , 42 , 43 , 44 , 45
|
|
|
|
define <8 x i32> @add_undef_elts(<4 x i32> %x) {
|
|
; SSE-LABEL: add_undef_elts:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: add_undef_elts:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: retq
|
|
%extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%bogus_bo = add <8 x i32> %extend, <i32 undef, i32 undef, i32 undef, i32 undef, i32 42, i32 43, i32 44, i32 12>
|
|
%arbitrary_shuf = shufflevector <8 x i32> %bogus_bo, <8 x i32> undef, <8 x i32> <i32 6, i32 0, i32 5, i32 4, i32 3, i32 2, i32 1, i32 7>
|
|
ret <8 x i32> %arbitrary_shuf
|
|
}
|
|
|
|
; Verify that constant operand 0 for a sub works too.
|
|
|
|
define <8 x i32> @sub_undef_elts(<4 x i32> %x) {
|
|
; SSE-LABEL: sub_undef_elts:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: sub_undef_elts:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: retq
|
|
%extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%bogus_bo = sub <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 42, i32 43, i32 44, i32 12>, %extend
|
|
%arbitrary_shuf = shufflevector <8 x i32> %bogus_bo, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 5, i32 4, i32 3, i32 2, i32 6, i32 7>
|
|
ret <8 x i32> %arbitrary_shuf
|
|
}
|
|
|
|
; and undef, C --> 0, so this tests that we are tracking known zero lanes.
|
|
|
|
define <4 x i64> @and_undef_elts(<2 x i64> %x) {
|
|
; SSE-LABEL: and_undef_elts:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: xorps %xmm0, %xmm0
|
|
; SSE-NEXT: xorps %xmm1, %xmm1
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: and_undef_elts:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
|
; AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
|
; AVX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,0,1,2]
|
|
; AVX-NEXT: retq
|
|
%extend = shufflevector <2 x i64> %x, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
|
%bogus_bo = and <4 x i64> %extend, <i64 undef, i64 undef, i64 42, i64 43>
|
|
%arbitrary_shuf = shufflevector <4 x i64> %bogus_bo, <4 x i64> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
|
|
ret <4 x i64> %arbitrary_shuf
|
|
}
|
|
|
|
; or undef, C --> -1, so this tests that we are tracking known all-ones lanes.
|
|
|
|
define <4 x i64> @or_undef_elts(<2 x i64> %x) {
|
|
; SSE-LABEL: or_undef_elts:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: pcmpeqd %xmm0, %xmm0
|
|
; SSE-NEXT: pcmpeqd %xmm1, %xmm1
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: or_undef_elts:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
|
; AVX-NEXT: vorps {{.*}}(%rip), %ymm0, %ymm0
|
|
; AVX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,0,1,2]
|
|
; AVX-NEXT: retq
|
|
%extend = shufflevector <2 x i64> %x, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
|
%bogus_bo = or <4 x i64> %extend, <i64 undef, i64 undef, i64 42, i64 43>
|
|
%arbitrary_shuf = shufflevector <4 x i64> %bogus_bo, <4 x i64> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
|
|
ret <4 x i64> %arbitrary_shuf
|
|
}
|
|
|
|
; Verify that this isn't limited to high/low halves.
|
|
|
|
define <8 x i32> @xor_undef_elts(<4 x i32> %x) {
|
|
; SSE-LABEL: xor_undef_elts:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: xor_undef_elts:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: retq
|
|
%extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 1, i32 3, i32 0, i32 2, i32 undef, i32 undef>
|
|
%bogus_bo = xor <8 x i32> %extend, <i32 42, i32 43, i32 undef, i32 undef, i32 undef, i32 undef, i32 44, i32 12>
|
|
%arbitrary_shuf = shufflevector <8 x i32> %bogus_bo, <8 x i32> undef, <8 x i32> <i32 6, i32 1, i32 5, i32 4, i32 3, i32 2, i32 0, i32 7>
|
|
ret <8 x i32> %arbitrary_shuf
|
|
}
|
|
|
|
; Verify that this isn't limited to high/low halves
|
|
; Special case: the undef-ness of the 1st shuffle may be lost if we turn that into vector concat.
|
|
|
|
define <8 x i32> @xor_undef_elts_alt(<4 x i32> %x) {
|
|
; SSE-LABEL: xor_undef_elts_alt:
|
|
; SSE: # %bb.0:
|
|
; SSE-NEXT: movaps %xmm0, %xmm1
|
|
; SSE-NEXT: movaps {{.*#+}} xmm2 = <u,u,44,12>
|
|
; SSE-NEXT: xorps %xmm0, %xmm2
|
|
; SSE-NEXT: xorps {{.*}}(%rip), %xmm1
|
|
; SSE-NEXT: movaps %xmm1, %xmm0
|
|
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm2[2,0]
|
|
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm2[1,0]
|
|
; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,0],xmm1[0,0]
|
|
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,2],xmm2[2,0]
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: xor_undef_elts_alt:
|
|
; AVX: # %bb.0:
|
|
; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
|
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
|
; AVX-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0
|
|
; AVX-NEXT: vmovaps {{.*#+}} ymm1 = [6,1,5,4,3,2,0,7]
|
|
; AVX-NEXT: vpermps %ymm0, %ymm1, %ymm0
|
|
; AVX-NEXT: retq
|
|
%extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 2, i32 3, i32 0, i32 1, i32 undef, i32 undef>
|
|
%bogus_bo = xor <8 x i32> %extend, <i32 42, i32 43, i32 undef, i32 undef, i32 undef, i32 undef, i32 44, i32 12>
|
|
%arbitrary_shuf = shufflevector <8 x i32> %bogus_bo, <8 x i32> undef, <8 x i32> <i32 6, i32 1, i32 5, i32 4, i32 3, i32 2, i32 0, i32 7>
|
|
ret <8 x i32> %arbitrary_shuf
|
|
}
|
|
|